-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmy_app4.py
144 lines (119 loc) · 5.85 KB
/
my_app4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
import pickle
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns
# Uygulamanın başına logo resmi ekleme
st.image("/Users/ebmac/Documents/Clarusway/DS/MLD/Project/car.jpeg", caption="Auto Scout Logo", use_column_width=True)
st.title("Auto Scout Model Prediction with Visual Enhancements")
# Data loading
df = pd.read_csv("Ready_to_ML.csv")
# Sidebar for model selection and training
st.sidebar.title("Model Selection and Training")
model_choice = st.sidebar.selectbox("Choose Model for Prediction", ["Linear Regression", "Random Forest", "Gradient Boosting", "Support Vector Regressor"])
# Makine öğrenimi modelleri sözlüğü
models = {
"Linear Regression": LinearRegression(),
"Random Forest": RandomForestRegressor(),
"Gradient Boosting": GradientBoostingRegressor(),
"Support Vector Regressor": SVR()
}
# Function to train and evaluate multiple models
def train_and_compare_models():
X = df[['make_model', 'age', 'mileage', 'engine_size']]
y = df['price']
# OneHotEncode the 'make_model' column and standardize numeric features
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), ['age', 'mileage', 'engine_size']),
# Set handle_unknown='ignore' to gracefully handle unknown categories
('cat', OneHotEncoder(handle_unknown='ignore'), ['make_model'])
])
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Create a pipeline for each model
model_performance = {}
for model_name, model in models.items():
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
('model', model)])
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# Cross-validation score
cv_scores = cross_val_score(pipeline, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
model_performance[model_name] = {
"MSE": mse,
"R-squared": r2,
"Cross-validation MSE": np.mean(-cv_scores)
}
# Save the pipeline instead of just the model
with open(f"{model_name.replace(' ', '_').lower()}_pipeline.pkl", "wb") as file:
pickle.dump(pipeline, file)
return model_performance
# Display model performance comparison with bar plot
def display_model_performance(model_performance):
performance_df = pd.DataFrame(model_performance).T.reset_index()
performance_df.columns = ['Model', 'MSE', 'R-squared', 'Cross-validation MSE']
# Bar Plot for MSE
st.write("### Model MSE Comparison:")
fig, ax = plt.subplots(figsize=(10, 5))
sns.barplot(x='Model', y='MSE', data=performance_df, ax=ax)
ax.set_title("Model MSE Comparison")
st.pyplot(fig)
if st.sidebar.button("Train and Compare Models"):
with st.spinner("Training models..."):
model_performance = train_and_compare_models()
st.sidebar.success("Model training completed!")
st.write("### Model Performance Comparison:")
display_model_performance(model_performance)
# Prediction Input UI
st.write("### Predict Car Price")
make_model = st.selectbox("Select the model of the car:", df['make_model'].unique())
age = st.number_input("Enter the age of the car:", min_value=0, max_value=30, value=5)
mileage = st.number_input("Enter the mileage of the car:", min_value=0, max_value=300000, value=50000)
engine_size = st.number_input("Enter the engine size of the car (in liters):", min_value=0.0, max_value=8.0, value=2.0)
def load_pipeline(model_choice):
try:
# Load the selected pipeline
with open(f"{model_choice.replace(' ', '_').lower()}_pipeline.pkl", "rb") as file:
pipeline = pickle.load(file)
except FileNotFoundError:
# If pipeline file not found, train and save the model
model_performance = train_and_compare_models()
pipeline = models[model_choice]
with open(f"{model_choice.replace(' ', '_').lower()}_pipeline.pkl", "wb") as file:
pickle.dump(pipeline, file)
return pipeline
# Predict Price
if st.button("Predict Price"):
pipeline = load_pipeline(model_choice)
# Create a DataFrame with the input data
input_data = pd.DataFrame([[make_model, age, mileage, engine_size]], columns=['make_model', 'age', 'mileage', 'engine_size'])
# Make the prediction using the loaded pipeline
prediction = pipeline.predict(input_data)
st.metric(label="Predicted Price", value=f"€{prediction[0]:.2f}")
# Plot predictions vs car age
ages = np.arange(0, 30, 1)
predictions = [pipeline.predict(pd.DataFrame([[make_model, age, mileage, engine_size]], columns=['make_model', 'age', 'mileage', 'engine_size']))[0] for age in ages]
fig = go.Figure()
fig.add_trace(go.Scatter(x=ages, y=predictions, mode='lines+markers', name='Price'))
fig.update_layout(
title="Price Predictions vs Car Age",
xaxis_title="Car Age",
yaxis_title="Predicted Price (€)",
plot_bgcolor='#e1e4e8',
paper_bgcolor='#f0f2f6'
)
st.plotly_chart(fig)