-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.py
164 lines (115 loc) · 6.76 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import pandas as pd
import numpy as np
from scipy.optimize import minimize
from pathlib import Path
from typing import Annotated
import os
from openai import OpenAI
def sp500_fundaments(file_path: Annotated[str, "File with stock fundamentals data"]):
client = OpenAI(
# Defaults to os.environ.get("OPENAI_API_KEY")
)
# Step 1: Create a Vector Store
vector_store = client.beta.vector_stores.create(name="Fundamental Stock Data")
print("Vector Store created:", vector_store.id) # This is your vector_store.id
# Step 2: Prepare Files for Upload
file_paths = [file_path]
file_streams = [open(path, "rb") for path in file_paths]
# Step 3: Upload Files and Add to Vector Store (with status polling)
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
vector_store_id=vector_store.id, files=file_streams
)
# Step 4: Verify Completion (Optional)
print("File batch status:", file_batch.status)
#print("Uploaded file count:", file_batch.file_counts.processed)
return vector_store.id
def calc_cov_matrix(file_path: Annotated[str, "File with stock price data"], tickers: Annotated[list, "List of stock tickers"], date: Annotated[str, "The base date in 'YYYY-MM-DD' format"]):
# Read the CSV file into a DataFrame
data = pd.read_csv(file_path)
# Convert 'Date' column to datetime
data['Date'] = pd.to_datetime(data['Date'])
# Ensure the given date is of datetime type
date = pd.to_datetime(date)
# Filter data for the selected stocks
stock_data = data[data['Ticker'].isin(tickers)]
# Filter data for the last 30 days prior to the given date
start_date = date - pd.Timedelta(days=30)
filtered_data = stock_data[(stock_data['Date'] > start_date) & (stock_data['Date'] <= date)]
# Pivot the DataFrame to have Dates as rows and Tickers as columns
pivot_data = filtered_data.pivot(index='Date', columns='Ticker', values='Close')
# Calculate daily returns
returns = pivot_data.pct_change().dropna()
# Calculate the covariance matrix of the returns
covariance_matrix = returns.cov()
return covariance_matrix
def portfolio_volatility(file_path: Annotated[str, "File with stock price data"], tickers: Annotated[list, "List of stock tickers"], date: Annotated[str, "The base date in 'YYYY-MM-DD' format"], weights: Annotated[list, "Weights of stocks in a portfolio"]) -> Annotated[float, "portfolio risk"]:
# Convert weights from percentages to decimals
weights = np.array(weights) / 100
covariance_matrix=calc_cov_matrix(file_path, tickers, date)
# Calculate portfolio variance
portfolio_variance = np.dot(weights.T, np.dot(covariance_matrix, weights))
# Calculate portfolio volatility (standard deviation)
portfolio_volatility = np.sqrt(portfolio_variance)
return portfolio_volatility
def returns(file_path: Annotated[str, "File with stock price data"], tickers: Annotated[list, "List of stock tickers"], date: Annotated[str, "The base date in 'YYYY-MM-DD' format"], weights: Annotated[list, "Weights of stocks in a portfolio"]) -> Annotated[dict, "portfolio returns"]:
"""
Compute the returns
Parameters:
file_path (str): Path to the CSV file containing columns ['Ticker', 'CUSIP', 'Date', 'Open', 'High', 'Low', 'Close']
tickers (list): List of stock tickers.
date (str): The base date in 'YYYY-MM-DD' format.
Returns:
dict: A dictionary with tickers as keys and their Garman-Klass volatility as values.
"""
# Load data from CSV file
df = pd.read_csv(file_path)
#print(df.head())
# Ensure the Date column is a datetime type
df['Date'] = pd.to_datetime(df['Date'])
# Dictionary to store the results
results = {}
for ticker in tickers:
# Check if the ticker exists in the DataFrame
if ticker not in df['Ticker'].unique():
results[ticker] = f"Ticker '{ticker}' not found in the data."
continue
# Filter the dataframe for the given ticker and sort by date
ticker_df = df[df['Ticker'] == ticker].sort_values(by='Date')
# Determine the last 30 days from the given date
end_date = pd.to_datetime(date)
start_date = end_date - pd.DateOffset(days=1)
# Filter the data for the last 2 days
last_2_days = ticker_df[(ticker_df['Date'] >= start_date) & (ticker_df['Date'] <= end_date)]
# Check if there is enough data
while last_2_days.shape[0] < 2:
start_date -= pd.DateOffset(days=1)
last_2_days = ticker_df[(ticker_df['Date'] >= start_date) & (ticker_df['Date'] <= end_date)]
# Calculate the daily log return
log_return = np.log(last_2_days['Close'].iloc[-1] / last_2_days['Close'].iloc[0])
# Store the result
results[ticker] = log_return
# Multiply returns by weights and sum them to get the portfolio return
portfolio_return = sum(results[t] * w for t, w in zip(tickers, weights) if t in results)
return portfolio_return
def optimize_for_target_risk(file_path: Annotated[str, "File with stock price data"], tickers: Annotated[list, "List of stock tickers"], date: Annotated[str, "The base date in 'YYYY-MM-DD' format"], weights: Annotated[list, "Weights of stocks in a portfolio"], target_risk: Annotated[float, "Target for portfolio risk"]):
# Convert weights from percentages to decimals
weights = np.array(weights) / 100
initial_weights = weights
covariance_matrix=calc_cov_matrix(file_path, tickers, date)
#print(covariance_matrix)
# Objective function to minimize
def objective(weights,initial_weights,covariance_matrix):
portfolio_variance = np.dot(weights, np.dot(covariance_matrix, weights))
portfolio_volatility = np.sqrt(portfolio_variance)
#print (portfolio_volatility)
#print(weights)
return 100*(portfolio_volatility - target_risk)**2 + np.sum((weights - initial_weights)**2)/120
# Constraints: weights must sum to 1
constraints = ({'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1})
# Bounds for weights: each weight between 0 and 1
bounds = [(0, 1) for _ in tickers]
# Optimization
result = minimize(objective, weights, args=(initial_weights,covariance_matrix), method='SLSQP', bounds=bounds, constraints=constraints)
return result.x*100 # Convert weights back to percentages
#print(returns('D:/Witold/Documents/Computing/LLMAgentsOfficial/Hackathon/sp500_stock_data.csv',['NVDA','SCHO','UAL'],'2024-08-05',[50, 0, 50]))
#print(sp500_fundaments('D:/Witold/Documents/Computing/LLMAgentsOfficial/Hackathon/sp500_stock_data_fundaments.txt'))