-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEMMISION PREDICTION MODEL
198 lines (158 loc) · 6.87 KB
/
EMMISION PREDICTION MODEL
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#INDIA SPECIFIC MODEL
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor
from prophet import Prophet
import os
# -------------------------------
# 1. Data Generation & Preprocessing (Indian Standards)
# -------------------------------
def generate_data():
"""
Generate synthetic SO₂ emission data based on Indian standards and save it to CSV.
"""
np.random.seed(42)
# Date range (monthly from 2000 to 2023)
dates = pd.date_range(start='2000-01-01', end='2023-12-31', freq='ME')
# Fuel types based on Indian power plants
fuel_types = ['Domestic Coal', 'Imported Coal', 'Lignite', 'Natural Gas']
regions = ['North', 'South', 'East', 'West']
n_samples = 1000
fuel_choice = np.random.choice(fuel_types, n_samples)
energy_output = []
so2_emissions = []
fuel_cost = []
for fuel in fuel_choice:
if fuel == 'Domestic Coal':
energy_output.append(np.random.uniform(300, 1500))
so2_emissions.append(np.random.uniform(100, 600)) # Indian SO₂ norms (mg/Nm³)
fuel_cost.append(np.random.uniform(2000, 5000)) # INR per ton
elif fuel == 'Imported Coal':
energy_output.append(np.random.uniform(500, 2000))
so2_emissions.append(np.random.uniform(50, 300))
fuel_cost.append(np.random.uniform(3000, 7000))
elif fuel == 'Lignite':
energy_output.append(np.random.uniform(200, 1000))
so2_emissions.append(np.random.uniform(400, 800))
fuel_cost.append(np.random.uniform(1000, 3000))
elif fuel == 'Natural Gas':
energy_output.append(np.random.uniform(100, 800))
so2_emissions.append(np.random.uniform(10, 100))
fuel_cost.append(np.random.uniform(4000, 9000))
df = pd.DataFrame({
'date': np.random.choice(dates, n_samples),
'fuel_type': fuel_choice,
'energy_output': energy_output,
'so2_emissions': so2_emissions,
'fuel_cost': fuel_cost,
'region': np.random.choice(regions, n_samples)
})
df.to_csv('so2_data_india.csv', index=False)
print("Synthetic data generated for Indian standards and saved as 'so2_data_india.csv'.")
def preprocess_data():
df = pd.read_csv('so2_data_india.csv')
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['fuel_cost_lag1'] = df.groupby('fuel_type')['fuel_cost'].shift(1)
df['so2_rolling_avg'] = df.groupby('region')['so2_emissions'].transform(lambda x: x.rolling(12).mean())
df['fuel_cost_lag1'] = df['fuel_cost_lag1'].fillna(df['fuel_cost'])
df['so2_rolling_avg'] = df['so2_rolling_avg'].fillna(df['so2_emissions'])
return df
# -------------------------------
# 2. Hybrid Model (Prophet + XGBoost)
# -------------------------------
class HybridModel:
def __init__(self):
self.prophet = Prophet(seasonality_mode='multiplicative')
self.xgb = XGBRegressor(n_estimators=300, learning_rate=0.01)
self.preprocessor = None
def fit(self, df):
prophet_df = df[['date', 'so2_emissions']].rename(columns={'date': 'ds', 'so2_emissions': 'y'})
self.prophet.fit(prophet_df)
prophet_forecast = self.prophet.predict(prophet_df)
X = df.drop(['so2_emissions', 'date'], axis=1)
y_residuals = df['so2_emissions'] - prophet_forecast['yhat'].values
self.preprocessor = ColumnTransformer([
('num', StandardScaler(), ['year', 'month', 'fuel_cost', 'fuel_cost_lag1', 'so2_rolling_avg']),
('cat', OneHotEncoder(handle_unknown='ignore'), ['fuel_type', 'region'])
])
X_processed = self.preprocessor.fit_transform(X)
self.xgb.fit(X_processed, y_residuals)
print("Hybrid model trained successfully.")
def predict(self, df):
prophet_df = df[['date']].rename(columns={'date': 'ds'})
prophet_forecast = self.prophet.predict(prophet_df)
X = df.drop(['date', 'so2_emissions'], axis=1, errors='ignore')
X_processed = self.preprocessor.transform(X)
xgb_pred = self.xgb.predict(X_processed)
return prophet_forecast['yhat'].values + xgb_pred
# -------------------------------
# 3. Training & Saving the Model
# -------------------------------
def train_model():
if not os.path.exists('so2_data_india.csv'):
generate_data()
df = preprocess_data()
train_size = int(0.8 * len(df))
train_df = df.iloc[:train_size]
test_df = df.iloc[train_size:]
model = HybridModel()
model.fit(train_df)
y_test = test_df['so2_emissions']
y_pred = model.predict(test_df)
mae = mean_absolute_error(y_test, y_pred)
print(f"Model Evaluation: MAE = {mae:.2f}")
joblib.dump(model, 'so2_trained_model_india.pkl')
print("Trained model saved as 'so2_trained_model_india.pkl'.")
# -------------------------------
# 4. User Input & Prediction
# -------------------------------
def get_user_input():
"""
Collect user input for prediction and return it as a DataFrame.
"""
fuel_type = input("Enter fuel type (Domestic Coal, Imported Coal, Lignite, Natural Gas): ").strip()
region = input("Enter region (North, South, East, West): ").strip()
year = int(input("Enter year: ").strip())
date=int(input("Enter date: ").strip())
month = int(input("Enter month (1-12): ").strip())
fuel_cost = float(input("Enter fuel cost (INR per ton): ").strip())
# Generate a date based on user input (end of month to match dataset)
date = pd.to_datetime(f"{year}-{month}-28") # Using 28 to avoid invalid dates
data = pd.DataFrame({
'date': [date], # Add the missing 'date' column
'fuel_type': [fuel_type],
'region': [region],
'year': [year],
'month': [month],
'fuel_cost': [fuel_cost],
'fuel_cost_lag1': [fuel_cost], # Assuming current cost for lag1
'so2_rolling_avg': [100] # Placeholder, should ideally be based on past data
})
return data
def predict_from_user_input():
if not os.path.exists('so2_trained_model_india.pkl'):
print("No trained model found. Please train the model first.")
return
model = joblib.load('so2_trained_model_india.pkl')
print("Model loaded successfully!")
user_data = get_user_input()
prediction = model.predict(user_data)
print("\nPrediction Results:")
print(f"Predicted SO₂ Emissions: {prediction[0]:.2f} mg/Nm³")
if __name__ == "__main__":
print("Select an option:")
print("1: Train Model")
print("2: Predict SO₂ Emission")
choice = input("Enter 1 or 2: ").strip()
if choice == "1":
train_model()
elif choice == "2":
predict_from_user_input()
else:
print("Invalid option. Exiting.")