-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathloader.py
42 lines (42 loc) · 1.76 KB
/
loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env python3
import os
import pandas
import datetime
from util import *
def loadDataFile(file_name: str):
data = pandas.read_csv(file_name)
data = data.rename(columns={RESOURCE_DATE_KEY:"Date", RESOURCE_CLOSE_KEY:"Close"})
return data[["Date", "Close"]]
# loads only part of the data. Useful for development if you want to test something fast
def loadCountAndClean(list_file_name: str, count: int):
resources_dir = os.path.join(os.getcwd(), RESOURCES_DIR)
with open(list_file_name) as f:
data = []
for line in f:
company_code = line.rstrip('\n').split(" ")[2].lower()
resource_path = os.path.join(resources_dir, company_code + RESOURCE_EXT)
try:
company_data = loadDataFile(resource_path)
except FileNotFoundError:
print("File not found:" + resource_path)
continue
except KeyError:
print("Malformed data:" + resource_path)
continue
company_data['Symbol'] = company_code
data.append(company_data)
if len(data) == count:
break
data = pandas.concat(data)
data = data.pivot('Date', 'Symbol', 'Close').reset_index()
for code in data.columns[1:]:# first column is "date"
if data[code].isnull().iloc[0] or data[code].isnull().iloc[-1]:
print("Removing data for index "+code+" because only part of the time window is avaliable")
del data[code]
print("Removing rows with partial data...")
data = data.dropna()
print("Done")
data = data.set_index("Date")
return data
def loadAllAndClean(list_file_name: str):
return loadCountAndClean(list_file_name, -1)