-
Notifications
You must be signed in to change notification settings - Fork 129
/
Copy pathStocks.py
67 lines (44 loc) · 1.82 KB
/
Stocks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 30 19:29:03 2014
@author: francesco
"""
#import pandas as pd
#import matplotlib.pyplot as plt
#import numpy as np
#from Project import loadDatasets
from functions import *
target = 'CLASSIFICATION'
#target = 'REGRESSION'
lags = range(2,3)
print 'Maximum time lag applied', max(lags)
print ''
for maxdelta in range(3,12):
datasets = loadDatasets('/home/francesco/Dropbox/DSR/Project/datasets')
delta = range(2,maxdelta)
print 'Delta days accounted: ', max(delta)
for dataset in datasets:
columns = dataset.columns
adjclose = columns[-2]
returns = columns[-1]
for n in delta:
addFeatures(dataset, adjclose, returns, n)
dataset = dataset.iloc[max(delta):,:] # computation of returns and moving means introduces NaN which are nor removed
finance = mergeDataframes(datasets, 6)
print 'Size of data frame: ', finance.shape
print 'Number of NaN after merging: ', count_missing(finance)
finance = finance.interpolate(method='time')
print 'Number of NaN after time interpolation: ', finance.shape[0]*finance.shape[1] - finance.count().sum()
finance = finance.fillna(finance.mean())
print 'Number of NaN after mean interpolation: ', (finance.shape[0]*finance.shape[1] - finance.count().sum())
back = -1
finance.Return_SP500 = finance.Return_SP500.shift(back)
finance = applyTimeLag(finance, lags, delta, back)
print 'Number of NaN after temporal shifting: ', count_missing(finance)
print 'Size of data frame after feature creation: ', finance.shape
if target == 'CLASSIFICATION':
performClassification(finance, 0.8)
print ''
elif target == 'REGRESSION':
performRegression(finance, 0.8)
print ''