Skip to content

Commit

Permalink
Support for pandas time series has been improved.
Browse files Browse the repository at this point in the history
Minimal changes applied to df_model.py.
Detection and formatting of pandas time series has been improved in 
time_series.py
Examples of pandas time series have been redone in the examples 
directory.
  • Loading branch information
uvemas committed Dec 23, 2019
1 parent 3a588c0 commit 6329c3f
Show file tree
Hide file tree
Showing 9 changed files with 180 additions and 101 deletions.
29 changes: 9 additions & 20 deletions examples/scripts/pandas_timeseries1.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,33 +26,22 @@
import numpy as np
import pandas as pd

## A test series, the deprecated way (not supported by timeseries plugin)
#startdate = pd.Period(freq='D', year=2009, month=1, day=1)
#rng = pd.period_range(startdate, periods=365)
#ts = pd.Series(np.arange(1, 366), index=rng)

## A test series, the recommended way
dti = pd.DatetimeIndex(start='1/1/2009', freq='D', periods=365)
# Create a DataFrame with a DateTimeIndex and linear data
dti = pd.date_range(start='1/1/2019', periods=365, name='Days')
ts = pd.Series(np.arange(1, 366), index=dti)
# Saving this time series in a HDFStore will add two arrays to the h5 file
# one array with the index and other with the data
# So we create a data frame in order to store all the information in a table
df = pd.DataFrame(ts)

# Write to a PyTables file
# Create an empty HDFStore
output_dir = '../timeseries'
hdf5_name = 'pandas_test1.hdf5'
filepath_hdf5 = os.path.join(output_dir, hdf5_name)
try:
os.mkdir(output_dir)
except OSError:
pass
finally:
store = pd.HDFStore(filepath_hdf5)

hdf5_name = 'pandas_test1.hdf5'
filepath_hdf5 = os.path.join(output_dir, hdf5_name)
store = pd.HDFStore(filepath_hdf5)

# The following code create a group with 1 leaf (Table instance)
# df
# |_ table
store.append('one_column_ts', df)
# Store the dataframe as a PyTables Table under the root group
store.append('', df)
store.close()

31 changes: 12 additions & 19 deletions examples/scripts/pandas_timeseries2.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,33 +24,26 @@
import os
import datetime

import pandas
import pandas as pd
import pandas_datareader.data as web

startdate = datetime.date(2002, 1, 5)
enddate = datetime.date(2003, 12, 1)
start = datetime.date(2002, 1, 5)
end = datetime.date(2003, 12, 1)

# Retrieve data from Google Finance
# Data format is [(d, [open, high, low, close], volume), ...]
google_f = web.DataReader('F', 'google', startdate, enddate)
# Retrieve inflation data from FRED
inflation = web.DataReader(['CPIAUCSL', 'CPILFESL'], 'fred', start, end)

# Write to a PyTables file
# Create an empty HDFStore
output_dir = '../timeseries'
hdf5_name = 'pandas_test2.hdf5'
filepath_hdf5 = os.path.join(output_dir, hdf5_name)
try:
os.mkdir(output_dir)
except OSError:
pass
finally:
store = pd.HDFStore(filepath_hdf5)

hdf5_name = 'pandas_test2.hdf5'
filepath_hdf5 = os.path.join(output_dir, hdf5_name)
store = pandas.HDFStore(filepath_hdf5)

# The following code stores the information in a Table instance
# intc
# |_ table (field index contains the dates range used as index, field
# values_block0 contains [open, high, low, close, adj close],
# field values_block1 contains volume
# shape is (480,))
store.append('google_f', google_f)
# Store the extracted data as a PyTables Table under the group fred_inflation
store.append('fred_inflation', inflation)
store.close()

44 changes: 19 additions & 25 deletions examples/scripts/pandas_timeseries3.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,37 +23,31 @@

import os

import numpy as np
import numpy.random as nr
import pandas

## a test data frame with 3 column and 365 rows
index = pandas.date_range('1/1/2009', periods=365, freq='D')
df = pandas.DataFrame(nr.randn(365, 3), index=index, columns=['A', 'B', 'C'])

# Write to a PyTables file
import pandas as pd

# A multiindexed dataframe with 3 columns of data (including a time series)
dti = list(pd.date_range('1/1/2019', periods=365, freq='D'))
ordinal = list(np.arange(1, 6))
iterables = [dti, ordinal]
index = pd.MultiIndex.from_product(iterables, names=['first', 'second'])
d = {'A': nr.randn(1825),
'B': nr.randn(1825),
'C': pd.date_range('1/1/2017', periods=1825)}
df = pd.DataFrame(d, index=index)

# Create an empty HDFStore
output_dir = '../timeseries'
hdf5_name = 'pandas_test3.hdf5'
filepath_hdf5 = os.path.join(output_dir, hdf5_name)
try:
os.mkdir(output_dir)
except OSError:
pass
finally:
store = pd.HDFStore(filepath_hdf5)

hdf5_name = 'pandas_test3.hdf5'
filepath_hdf5 = os.path.join(output_dir, hdf5_name)
store = pandas.HDFStore(filepath_hdf5)

# The following code create a group with 4 leaves (Array instances)
# df
# |_ axis1 (the row index, a range of dates, shape is (365,))
# |_ axis0 (the column index, shape is (3,))
# |_ block0_values (the random array of values, shape is (365,3))
# |_ block0_items (identical to axis0)
store['df'] = df

# The following code stores the same information in a Table instance
# df_table
# |_ table (field index contains the range of dates used as index, field
# values_block0 contains the random array of values,
# shape is (365,))
# Store the dataframe as a PyTables Table under the group df_table
store.append('df_table', df)
store.close()

Binary file modified examples/timeseries/pandas_test1.hdf5
Binary file not shown.
Binary file modified examples/timeseries/pandas_test2.hdf5
Binary file not shown.
Binary file modified examples/timeseries/pandas_test3.hdf5
Binary file not shown.
1 change: 0 additions & 1 deletion vitables/plugins/timeseries/time_format.ini
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
[Timeseries]
strftime = %c

129 changes: 113 additions & 16 deletions vitables/plugins/timeseries/time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,16 @@
except ImportError:
pd = None

from qtpy import QtCore
from qtpy import QtCore, QtGui
from qtpy.QtCore import Qt
from qtpy import QtWidgets

import vitables.utils
from vitables.plugins.timeseries.aboutpage import AboutPage

_axis_font = QtGui.QFont()
_axis_font.setBold(True)

__docformat__ = 'restructuredtext'
__version__ = '2.1'
plugin_name = 'Time series formatter'
Expand Down Expand Up @@ -86,16 +90,24 @@ def findTS(leaf, node_kind):

time_types = ['time32', 'time64']
if isinstance(leaf, tables.Table):
attrs = leaf._v_attrs
asi = leaf._v_attrs
coltypes = leaf.coltypes
pgroup = leaf._g_getparent()

# Check for Pandas timeseries
if pd and hasattr(attrs, 'index_kind') and \
(attrs.index_kind in ('datetime64', 'datetime32')):
return 'pandas_ts'
pandas_attr = getattr(pgroup._v_attrs, 'pandas_type', None)
if pd and pandas_attr in ['frame', 'frame_table']:
userattrs_names = asi._v_attrnamesuser
dtype_attrs = [n for n in userattrs_names if n.endswith('_dtype')]
dtype_attrs.append('index_kind')
for n in dtype_attrs:
if getattr(asi, n).startswith('datetime'):
return 'pandas_ts'

# Check for scikits.timeseries timeseries
if ts and hasattr(attrs, 'CLASS') and \
(attrs.CLASS == 'TimeSeriesTable'):
if ts and hasattr(asi, 'CLASS') and (asi.CLASS == 'TimeSeriesTable'):
return 'scikits_ts'

# Check for PyTables timeseries
for name in leaf.colnames:
if (name in coltypes) and (coltypes[name] in time_types):
Expand All @@ -105,8 +117,8 @@ def findTS(leaf, node_kind):
(leaf.atom.shape == ()) and \
(node_kind != 'vlarray'):
return 'pytables_ts'
else:
return None

return None


def tsPositions(ts_kind, leaf):
Expand All @@ -129,7 +141,26 @@ def tsPositions(ts_kind, leaf):
if (ts_kind == 'scikits_ts'):
positions.append(leaf.coldescrs['_dates']._v_pos)
elif (ts_kind == 'pandas_ts'):
positions.append(leaf.coldescrs['index']._v_pos)
hstore = pd.HDFStore(leaf._v_file.filename)
pgroup = leaf._g_getparent()
df = hstore[pgroup._v_name]

# Inspect dataframe index
df_index = df.index
if isinstance(df_index, pd.MultiIndex):
nlevels = df_index.nlevels
for i, idx in enumerate(df_index.names):
dtime = 'datetime'
if df_index.get_level_values(i).dtype.name.startswith(dtime):
positions.append(i)
elif df_index.dtype.name.startswith('datetime'):
nlevels = 1
positions.append(0)

# Inspect dataframe data
for i, dt in enumerate(df.dtypes):
if dt.name.startswith('datetime'):
positions.append(nlevels + i)
elif ts_kind == 'pytables_ts':
if isinstance(leaf, tables.Table):
for name in leaf.colnames:
Expand Down Expand Up @@ -226,15 +257,19 @@ def customiseModel(self, datasheet):
'ts_format': datetimeFormat(),
}
if isinstance(leaf, tables.Table):
leaf_kind = 'table'
if isinstance(model, vitables.vttables.df_model.DataFrameModel):
leaf_kind = 'dataframe'
else:
leaf_kind = 'table'
else:
leaf_kind = 'array'
model_info = {
'leaf_kind': leaf_kind,
'model': model,
'numrows': model.rowCount(),
'formatContent': model.formatContent,
}
if leaf_kind in ['table', 'array']:
model_info['formatContent'] = model.formatContent

# Add required attributes to model
for k in ts_info:
Expand Down Expand Up @@ -301,13 +336,16 @@ def __init__(self, model_info, ts_info, parent=None):
self.model = model_info['model']
self.numrows = model_info['numrows']
self.ts_cols = ts_info['ts_cols']
self.formatContent = model_info['formatContent']
if 'formatContent' in model_info:
self.formatContent = model_info['formatContent']

self.tsFormatter = self.timeFormatter()

leaf_kind = model_info['leaf_kind']
if leaf_kind == 'table':
self.data = self.table_data
elif leaf_kind == 'dataframe':
self.data = self.df_data
else:
self.data = self.array_data

Expand Down Expand Up @@ -338,6 +376,67 @@ def table_data(self, index, role=QtCore.Qt.DisplayRole):

return None

def df_data(self, index, role=QtCore.Qt.DisplayRole):
"""Returns the data stored under the given role for the item
referred to by the index.
This is an overwritten method.
:Parameters:
- `index`: the index of a data item
- `role`: the role being returned
"""
row, col = index.row(), index.column()
n_columns, n_index = self.model._nheaders
df = self.model._chunk

if not index.isValid() or not (0 <= row < (self.numrows + n_columns)):
return None

is_index = col < n_index
is_columns = (self.model.start + row) < n_columns

if is_index and is_columns:
return None

if is_index:
if role == Qt.DisplayRole:
val = df.index[row - n_columns]
if n_index > 1:
val = val[col]
if col in self.ts_cols:
return self.tsFormatter(val)
return str(val)
if role == Qt.FontRole:
return _axis_font
if role == Qt.TextAlignmentRole:
return int(Qt.AlignRight | Qt.AlignVCenter)
return

if is_columns:
if role == Qt.DisplayRole:
val = df.columns[col - n_index]
if n_columns > 1:
val = val[row]
return str(val)
if role == Qt.FontRole:
return _axis_font
if role == Qt.TextAlignmentRole:
return int(Qt.AlignCenter | Qt.AlignBottom)
return

if role == Qt.DisplayRole:
val = self.model._chunk.iat[row - n_columns, col - n_index]
if col in self.ts_cols:
return self.tsFormatter(val)
return str(val)

# if role == Qt.TextAlignmentRole:
# return int(Qt.AlignLeft|Qt.AlignTop)

return

def array_data(self, index, role=QtCore.Qt.DisplayRole):
"""Returns the data stored under the given role for the item
referred to by the index.
Expand Down Expand Up @@ -400,10 +499,8 @@ def formatPandasTS(self, content):
:Parameter content: the content of the table cell being formatted
"""
# ImportError if pandas not installed!
date = pd.Timestamp(int(content))
try:
return date.strftime(self.ts_format)
return content.strftime(self.ts_format)
except ValueError:
return content

Expand Down
Loading

0 comments on commit 6329c3f

Please sign in to comment.