-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict_cleaned_up.py
86 lines (67 loc) · 3 KB
/
predict_cleaned_up.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 3 20:26:00 2023
@author: umroot
"""
import pandas as pd
import numpy as np
from smt.surrogate_models import QP, RBF, IDW
import joblib
def preprocess_date(date_val):
day = date_val.day
month = date_val.month
year = date_val.year
return day, month, year
def preprocess_year(year_val):
# Calculate the simplified year representation
first_year = 2012 # Replace this with the actual first year in your dataset
simplified_year = year_val - first_year
return simplified_year
def build_model(file_path, sheet_name, date_column_name, us_eu_column_name, model_type='RBF', **model_options):
# Read the specific sheet of the Excel file into a DataFrame
df = pd.read_excel(file_path, sheet_name=sheet_name, usecols=[date_column_name, us_eu_column_name])
# Preprocess the date column to extract day, month, and year
df['Day'], df['Month'], df['Year'] = zip(*df[date_column_name].apply(preprocess_date))
# Preprocess the year column to get the simplified representation
df['Simplified_Year'] = df['Year'].apply(preprocess_year)
# Drop the original date and year columns
df.drop(columns=[date_column_name, 'Year'], inplace=True)
# Rename the US_EU_Column to Exchange_Rate
df.rename(columns={us_eu_column_name: 'Exchange_Rate'}, inplace=True)
# Drop rows with NaN values
df.dropna(inplace=True)
# Select the input features (Day, Month, Simplified_Year)
X = df[['Day', 'Month', 'Simplified_Year']].values.astype(np.float64)
# Select the target variable (Exchange_Rate)
y = df['Exchange_Rate'].values.astype(np.float64)
# Choose the model type and initialize the model
if model_type == 'QP':
sm = QP(**model_options)
elif model_type == 'RBF':
sm = RBF(**model_options)
elif model_type == 'IDW':
sm = IDW(**model_options)
else:
raise ValueError("Invalid model_type. Please choose from 'QP', 'RBF', or 'IDW'.")
# Set the training values and train the model
sm.set_training_values(X, y)
sm.train()
return sm
def test_model(model, test_input):
# Convert test input to a 2D array and set its dtype to float64
test_input_array = np.array([test_input], dtype=np.float64)
# Make predictions using the loaded model
output = model.predict_values(test_input_array)
return output
if __name__ == "__main__":
file_path = 'euusd.xls'
sheet_name = 'Daily_1'
date_column_name = 'observation_date'
us_eu_column_name = 'selected'
# Build the RBF model (you can change the model_type and options accordingly)
model = build_model(file_path, sheet_name, date_column_name, us_eu_column_name, model_type='QP')
joblib.dump(model, 'surrogate_model_with_QP.joblib')
# Test the model with a test_input
test_input = [4, 8, 11] # Expected density is 65.04632
output = test_model(model, test_input)
print("Predicted Output:", output)