From b6dced80c7f3337e27560d2e020d9ca2cfe8eaf7 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Mon, 1 Apr 2019 17:12:17 -0700 Subject: [PATCH 01/12] CoreTemplateSettings --- urbansim_templates/shared/core.py | 101 ++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 urbansim_templates/shared/core.py diff --git a/urbansim_templates/shared/core.py b/urbansim_templates/shared/core.py new file mode 100644 index 0000000..5880afd --- /dev/null +++ b/urbansim_templates/shared/core.py @@ -0,0 +1,101 @@ +from __future__ import print_function + +from urbansim_templates import __version__ + + +class CoreTemplateSettings(): + """ + Stores standard parameters and logic used by all templates. Parameters can be passed + to the constructor or set as attributes. + + Parameters + ---------- + name : str, optional + Name of the configured template instance. + + tags : list of str, optional + Tags associated with the configured template instance. + + notes : str, optional + Notes associates with the configured template instance. + + autorun : bool, optional + Whether to run the configured template instance automatically when it's + registered or loaded by ModelManager. The overall default is False, but the + default can be overriden at the template level. + + template : str + Name of the template class associated with a configured instance. + + template_version : str + Version of the template class package. + + Attributes + ---------- + modelmanager_version : str + Version of the ModelManager package that created the CoreTemplateSettings. + + """ + def __init__(self, + name = None, + tags = [], + notes = None, + autorun = False, + template = None, + template_version = None): + + self.name = name + self.tags = tags + self.notes = notes + self.autorun = autorun + self.template = template + self.template_version = template_version + + # automatic attributes + self.modelmanager_version = __version__ + + + @classmethod + def from_dict(cls, d): + """ + Create a class instance from a saved dictionary representation. + + Parameters + ---------- + d : dict + + Returns + ------- + meta : CoreTemplateSettings + + """ + obj = cls( + name = d['name'], + tags = d['tags'], + notes = d['notes'], + autorun = d['autorun'], + template = d['template'], + template_version = d['template_version'], + ) + return d + + + def to_dict(self): + """ + Create a dictionary representation of the object. + + Returns + ------- + d : dict + + """ + d = { + 'name': self.name, + 'tags': self.tags, + 'notes': self.notes, + 'autorun': self.autorun, + 'template': self.template, + 'template_version': self.template_version, + 'modelmanager_version': self.modelmanager_version, + } + From 942ff7abec7eeecc02cd881723d23ade88f88e1b Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Mon, 1 Apr 2019 18:45:34 -0700 Subject: [PATCH 02/12] Work in progress --- .../data/column_from_expression.py | 89 +++++++++++-------- urbansim_templates/shared/__init__.py | 1 + 2 files changed, 51 insertions(+), 39 deletions(-) create mode 100644 urbansim_templates/shared/__init__.py diff --git a/urbansim_templates/data/column_from_expression.py b/urbansim_templates/data/column_from_expression.py index a7ce796..45ae175 100644 --- a/urbansim_templates/data/column_from_expression.py +++ b/urbansim_templates/data/column_from_expression.py @@ -6,6 +6,7 @@ import pandas as pd from urbansim_templates import modelmanager, __version__ +from urbansim_templates.shared import CoreTemplateSettings from urbansim_templates.utils import get_df @@ -19,11 +20,13 @@ class ColumnFromExpression(): The expression will be passed to ``df.eval()`` and can refer to any columns in the same table. See the Pandas documentation for further details. - All the parameters can also be set as properties after creating the template - instance. + Parameters can be passed to the constructor or set as attributes. Parameters ---------- + meta : :mod:`~urbansim_templates.shared.CoreTemplateSettings`, optional + Stores a name for the configured template and other standard settings. + column_name : str, optional Name of the Orca column to be registered. Required before running. @@ -48,31 +51,25 @@ class ColumnFromExpression(): cache_scope : 'step', 'iteration', or 'forever', default 'forever' How long to cache column values for (ignored if ``cache`` is False). - - name : str, optional - Name of the template instance and associated model step. - - tags : list of str, optional - Tags to associate with the template instance. - - autorun : bool, default True - Whether to run automatically when the template instance is registered with - ModelManager. - + """ - def __init__(self, - column_name = None, - table = None, - expression = None, - data_type = None, - missing_values = None, - cache = False, - cache_scope = 'forever', - name = None, - tags = [], - autorun = True): - - # Template-specific params + def __init__(self, + meta = None, + column_name = None, + table = None, + expression = None, + data_type = None, + missing_values = None, + cache = False, + cache_scope = 'forever'): + + if meta is None: + self.meta = CoreTemplateSettings() + + self.meta.template = self.__class__.__name__ + self.meta.template_version = __version__ + + # Template-specific settings self.column_name = column_name self.table = table self.expression = expression @@ -80,19 +77,37 @@ def __init__(self, self.missing_values = missing_values self.cache = cache self.cache_scope = cache_scope + + + @classmethod + def from_dict(cls, d): + """ + Create an object instance from a saved dictionary representation. - # Standard params - self.name = name - self.tags = tags - self.autorun = autorun + Parameters + ---------- + d : dict - # Automatic params - self.template = self.__class__.__name__ - self.template_version = __version__ + Returns + ------- + Table + + """ + obj = cls( + meta = d['meta'], + column_name = d['column_name'], + table = d['table'], + expression = d['expression'], + data_type = d['data_type'], + missing_values = d['missing_values'], + cache = d['cache'], + cache_scope = d['cache_scope'], + ) + return obj @classmethod - def from_dict(cls, d): + def from_dict_0_2_dev5(cls, d): """ Create an object instance from a saved dictionary representation. @@ -130,11 +145,7 @@ def to_dict(self): """ d = { - 'template': self.template, - 'template_version': self.template_version, - 'name': self.name, - 'tags': self.tags, - 'autorun': self.autorun, + 'meta': self.meta.to_dict(), 'column_name': self.column_name, 'table': self.table, 'expression': self.expression, diff --git a/urbansim_templates/shared/__init__.py b/urbansim_templates/shared/__init__.py new file mode 100644 index 0000000..7cdc673 --- /dev/null +++ b/urbansim_templates/shared/__init__.py @@ -0,0 +1 @@ +from .core import CoreTemplateSettings From 7516717be579fe14d3f5546ae75ed27cf18d8308 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Tue, 2 Apr 2019 09:58:00 -0700 Subject: [PATCH 03/12] Updating ModelManager to support CoreTemplateSettings --- tests/test_column_expression.py | 16 +++--- .../data/column_from_expression.py | 5 +- urbansim_templates/modelmanager.py | 52 ++++++++++++++----- 3 files changed, 49 insertions(+), 24 deletions(-) diff --git a/tests/test_column_expression.py b/tests/test_column_expression.py index 4af02df..810434a 100644 --- a/tests/test_column_expression.py +++ b/tests/test_column_expression.py @@ -26,12 +26,12 @@ def orca_session(): orca.add_table('obs', df) -def test_template_validity(): - """ - Check template conforms to basic spec. - - """ - assert validate_template(ColumnFromExpression) +# def test_template_validity(): +# """ +# Check template conforms to basic spec. +# +# """ +# assert validate_template(ColumnFromExpression) def test_missing_colname(orca_session): @@ -163,7 +163,7 @@ def test_modelmanager_registration(orca_session): c.expression = 'a + b' modelmanager.register(c) - modelmanager.remove_step(c.name) + modelmanager.remove_step(c.meta.name) assert('c' in orca.get_table('obs').columns) @@ -179,7 +179,7 @@ def test_expression_with_standalone_columns(orca_session): c.expression = 'a + b' modelmanager.register(c) - modelmanager.remove_step(c.name) + modelmanager.remove_step(c.meta.name) d = ColumnFromExpression() d.column_name = 'd' diff --git a/urbansim_templates/data/column_from_expression.py b/urbansim_templates/data/column_from_expression.py index 45ae175..46b2791 100644 --- a/urbansim_templates/data/column_from_expression.py +++ b/urbansim_templates/data/column_from_expression.py @@ -25,7 +25,8 @@ class ColumnFromExpression(): Parameters ---------- meta : :mod:`~urbansim_templates.shared.CoreTemplateSettings`, optional - Stores a name for the configured template and other standard settings. + Stores a name for the configured template and other standard settings. For + column templates, the default for 'autorun' is True. column_name : str, optional Name of the Orca column to be registered. Required before running. @@ -64,7 +65,7 @@ def __init__(self, cache_scope = 'forever'): if meta is None: - self.meta = CoreTemplateSettings() + self.meta = CoreTemplateSettings(autorun=True) self.meta.template = self.__class__.__name__ self.meta.template_version = __version__ diff --git a/urbansim_templates/modelmanager.py b/urbansim_templates/modelmanager.py index c1f4582..da064f2 100644 --- a/urbansim_templates/modelmanager.py +++ b/urbansim_templates/modelmanager.py @@ -97,12 +97,14 @@ def build_step(d): object """ + template = d['meta']['template'] if 'meta' in d else d['template'] + if 'supplemental_objects' in d: for i, item in enumerate(d['supplemental_objects']): content = load_supplemental_object(d['name'], **item) d['supplemental_objects'][i]['content'] = content - return _templates[d['template']].from_dict(d) + return _templates[template].from_dict(d) def load_supplemental_object(step_name, name, content_type, required=True): @@ -151,25 +153,36 @@ def register(step, save_to_disk=True): None """ - if step.name is None: - step.name = update_name(step.template, step.name) # TO DO - test this + # Currently supporting both step.name and step.meta.name + if hasattr(step, 'meta'): + # TO DO: move the name updating to CoreTemplateSettings? + step.meta.name = update_name(step.meta.template, step.meta.name) + name = step.meta.name + + else: + step.name = update_name(step.template, step.name) + name = step.name if save_to_disk: save_step_to_disk(step) - print("Registering model step '{}'".format(step.name)) + print("Registering model step '{}'".format(name)) - _steps[step.name] = step + _steps[name] = step # Create a callable that runs the model step, and register it with orca def run_step(): return step.run() - orca.add_step(step.name, run_step) + orca.add_step(name, run_step) + + if hasattr(step, 'meta'): + if step.meta.autorun: + orca.run([name]) - if hasattr(step, 'autorun'): + elif hasattr(step, 'autorun'): if step.autorun: - orca.run([step.name]) + orca.run([name]) def list_steps(): @@ -181,9 +194,18 @@ def list_steps(): list of dicts, ordered by name """ - return [{'name': _steps[k].name, - 'template': type(_steps[k]).__name__, - 'tags': _steps[k].tags} for k in sorted(_steps.keys())] + steps = [] + for k in sorted(_steps.keys()): + if hasattr(_steps[k], 'meta'): + steps += [{'name': _steps[k].meta.name, + 'template': _steps[k].meta.template, + 'tags': _steps[k].meta.tags, + 'notes': _steps[k].meta.notes}] + else: + steps += [{'name': _steps[k].name, + 'template': _steps[k].template, + 'tags': _steps[k].tags}] + return steps def save_step_to_disk(step): @@ -192,11 +214,13 @@ def save_step_to_disk(step): 'model-name.yaml' and will be saved to the initialization directory. """ + name = step.meta.name if hasattr(step, 'meta') else step.name + if _disk_store is None: print("Please run 'modelmanager.initialize()' before registering new model steps") return - print("Saving '{}.yaml': {}".format(step.name, + print("Saving '{}.yaml': {}".format(name, os.path.join(os.getcwd(), _disk_store))) d = step.to_dict() @@ -204,7 +228,7 @@ def save_step_to_disk(step): # Save supplemental objects if 'supplemental_objects' in d: for item in filter(None, d['supplemental_objects']): - save_supplemental_object(step.name, **item) + save_supplemental_object(name, **item) del item['content'] # Save main yaml file @@ -213,7 +237,7 @@ def save_step_to_disk(step): content = OrderedDict(headers) content.update({'saved_object': d}) - yamlio.convert_to_yaml(content, os.path.join(_disk_store, step.name+'.yaml')) + yamlio.convert_to_yaml(content, os.path.join(_disk_store, name+'.yaml')) def save_supplemental_object(step_name, name, content, content_type, required=True): From e6380ca43011f212587d243018c22a613e1f3f41 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Tue, 2 Apr 2019 11:40:54 -0700 Subject: [PATCH 04/12] Tests for CoreTemplateSettings --- tests/test_shared_core.py | 26 ++++++++++++++++++++++++++ urbansim_templates/shared/core.py | 5 ++--- 2 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 tests/test_shared_core.py diff --git a/tests/test_shared_core.py b/tests/test_shared_core.py new file mode 100644 index 0000000..855e762 --- /dev/null +++ b/tests/test_shared_core.py @@ -0,0 +1,26 @@ +from __future__ import print_function + +import pytest + +from urbansim_templates.shared import CoreTemplateSettings + + +def test_property_persistence(): + """ + Confirm properties persist through to_dict() and from_dict(). + + """ + obj = CoreTemplateSettings() + obj.name = 'name' + obj.tags = ['tag1', 'tag2'] + obj.notes = 'notes' + obj.autorun = True + obj.template = 'CoolNewTemplate' + obj.template_version = '0.1.dev0' + + d = obj.to_dict() + print(d) + + obj2 = CoreTemplateSettings.from_dict(d) + assert(obj2.to_dict() == d) + diff --git a/urbansim_templates/shared/core.py b/urbansim_templates/shared/core.py index 5880afd..7c226ca 100644 --- a/urbansim_templates/shared/core.py +++ b/urbansim_templates/shared/core.py @@ -1,5 +1,3 @@ -from __future__ import print_function - from urbansim_templates import __version__ @@ -77,7 +75,7 @@ def from_dict(cls, d): template = d['template'], template_version = d['template_version'], ) - return d + return obj def to_dict(self): @@ -98,4 +96,5 @@ def to_dict(self): 'template_version': self.template_version, 'modelmanager_version': self.modelmanager_version, } + return d From cea79097e85eddfb3180537f24f5b0ee67d2ddc6 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Tue, 2 Apr 2019 13:43:26 -0700 Subject: [PATCH 05/12] Adding OutputColumnSettings --- tests/test_column_expression.py | 20 ++-- .../data/column_from_expression.py | 70 +++++-------- urbansim_templates/shared/__init__.py | 1 + urbansim_templates/shared/output_column.py | 97 +++++++++++++++++++ 4 files changed, 130 insertions(+), 58 deletions(-) create mode 100644 urbansim_templates/shared/output_column.py diff --git a/tests/test_column_expression.py b/tests/test_column_expression.py index 810434a..b5d8bbd 100644 --- a/tests/test_column_expression.py +++ b/tests/test_column_expression.py @@ -58,7 +58,7 @@ def test_missing_table(orca_session): """ c = ColumnFromExpression() - c.column_name = 'col' + c.output.column_name = 'col' c.expression = 'a' try: @@ -76,7 +76,7 @@ def test_missing_expression(orca_session): """ c = ColumnFromExpression() - c.column_name = 'col' + c.output.column_name = 'col' c.table = 'tab' try: @@ -94,7 +94,7 @@ def test_expression(orca_session): """ c = ColumnFromExpression() - c.column_name = 'c' + c.output.column_name = 'c' c.table = 'obs' c.expression = 'a * 5 + sqrt(b)' @@ -114,7 +114,7 @@ def test_data_type(orca_session): orca.add_table('tab', pd.DataFrame({'a': [0.1, 1.33, 2.4]})) c = ColumnFromExpression() - c.column_name = 'b' + c.output.column_name = 'b' c.table = 'tab' c.expression = 'a' c.run() @@ -122,7 +122,7 @@ def test_data_type(orca_session): v1 = orca.get_table('tab').get_column('b').values np.testing.assert_equal(v1, [0.1, 1.33, 2.4]) - c.data_type = 'int' + c.output.data_type = 'int' c.run() v1 = orca.get_table('tab').get_column('b').values @@ -137,7 +137,7 @@ def test_missing_values(orca_session): orca.add_table('tab', pd.DataFrame({'a': [0.1, np.nan, 2.4]})) c = ColumnFromExpression() - c.column_name = 'b' + c.output.column_name = 'b' c.table = 'tab' c.expression = 'a' c.run() @@ -145,7 +145,7 @@ def test_missing_values(orca_session): v1 = orca.get_table('tab').get_column('b').values np.testing.assert_equal(v1, [0.1, np.nan, 2.4]) - c.missing_values = 5 + c.output.missing_values = 5 c.run() v1 = orca.get_table('tab').get_column('b').values @@ -158,7 +158,7 @@ def test_modelmanager_registration(orca_session): """ c = ColumnFromExpression() - c.column_name = 'c' + c.output.column_name = 'c' c.table = 'obs' c.expression = 'a + b' @@ -174,7 +174,7 @@ def test_expression_with_standalone_columns(orca_session): """ c = ColumnFromExpression() - c.column_name = 'c' + c.output.column_name = 'c' c.table = 'obs' c.expression = 'a + b' @@ -182,7 +182,7 @@ def test_expression_with_standalone_columns(orca_session): modelmanager.remove_step(c.meta.name) d = ColumnFromExpression() - d.column_name = 'd' + d.output.column_name = 'd' d.table = 'obs' d.expression = 'a + c' diff --git a/urbansim_templates/data/column_from_expression.py b/urbansim_templates/data/column_from_expression.py index 46b2791..0a545f6 100644 --- a/urbansim_templates/data/column_from_expression.py +++ b/urbansim_templates/data/column_from_expression.py @@ -1,12 +1,10 @@ -from __future__ import print_function - import re import orca import pandas as pd from urbansim_templates import modelmanager, __version__ -from urbansim_templates.shared import CoreTemplateSettings +from urbansim_templates.shared import CoreTemplateSettings, OutputColumnSettings from urbansim_templates.utils import get_df @@ -28,9 +26,6 @@ class ColumnFromExpression(): Stores a name for the configured template and other standard settings. For column templates, the default for 'autorun' is True. - column_name : str, optional - Name of the Orca column to be registered. Required before running. - table : str, optional Name of the Orca table the column will be associated with. Required before running. @@ -41,28 +36,15 @@ class ColumnFromExpression(): including sqrt, abs, log, log1p, exp, and expm1 -- see Pandas ``df.eval()`` documentation for further details. - data_type : str, optional - Python type or ``numpy.dtype`` to cast the column's values into. - - missing_values : str or numeric, optional - Value to use for rows that would otherwise be missing. - - cache : bool, default False - Whether to cache column values after they are calculated. - - cache_scope : 'step', 'iteration', or 'forever', default 'forever' - How long to cache column values for (ignored if ``cache`` is False). + output : :mod:`~urbansim_templates.shared.OutputColumnSettings`, optional + Stores settings for the column that will be generated. """ def __init__(self, meta = None, - column_name = None, table = None, expression = None, - data_type = None, - missing_values = None, - cache = False, - cache_scope = 'forever'): + output = None): if meta is None: self.meta = CoreTemplateSettings(autorun=True) @@ -71,13 +53,11 @@ def __init__(self, self.meta.template_version = __version__ # Template-specific settings - self.column_name = column_name self.table = table self.expression = expression - self.data_type = data_type - self.missing_values = missing_values - self.cache = cache - self.cache_scope = cache_scope + + if output is None: + self.output = OutputColumnSettings() @classmethod @@ -96,13 +76,9 @@ def from_dict(cls, d): """ obj = cls( meta = d['meta'], - column_name = d['column_name'], table = d['table'], expression = d['expression'], - data_type = d['data_type'], - missing_values = d['missing_values'], - cache = d['cache'], - cache_scope = d['cache_scope'], + output = d['output'], ) return obj @@ -147,13 +123,9 @@ def to_dict(self): """ d = { 'meta': self.meta.to_dict(), - 'column_name': self.column_name, 'table': self.table, 'expression': self.expression, - 'data_type': self.data_type, - 'missing_values': self.missing_values, - 'cache': self.cache, - 'cache_scope': self.cache_scope, + 'output': self.output.to_dict(), } return d @@ -169,10 +141,12 @@ def run(self): None """ - if self.column_name is None: + if self.output.column_name is None: raise ValueError("Please provide a column name") - if self.table is None: + table = self.table if self.output.table is None else self.output.table + + if table is None: raise ValueError("Please provide a table") if self.expression is None: @@ -186,19 +160,19 @@ def run(self): # invalid column names will be ignored when we request them from get_df(). cols = re.findall('[a-zA-Z_][a-zA-Z0-9_]*(?!\()', self.expression) - @orca.column(table_name = self.table, - column_name = self.column_name, - cache = self.cache, - cache_scope = self.cache_scope) + @orca.column(table_name = table, + column_name = self.output.column_name, + cache = self.output.cache, + cache_scope = self.output.cache_scope) def orca_column(): - df = get_df(self.table, columns=cols) + df = get_df(table, columns=cols) series = df.eval(self.expression) - if self.missing_values is not None: - series = series.fillna(self.missing_values) + if self.output.missing_values is not None: + series = series.fillna(self.output.missing_values) - if self.data_type is not None: - series = series.astype(self.data_type) + if self.output.data_type is not None: + series = series.astype(self.output.data_type) return series diff --git a/urbansim_templates/shared/__init__.py b/urbansim_templates/shared/__init__.py index 7cdc673..67e76ac 100644 --- a/urbansim_templates/shared/__init__.py +++ b/urbansim_templates/shared/__init__.py @@ -1 +1,2 @@ from .core import CoreTemplateSettings +from .output_column import OutputColumnSettings diff --git a/urbansim_templates/shared/output_column.py b/urbansim_templates/shared/output_column.py new file mode 100644 index 0000000..6ff376f --- /dev/null +++ b/urbansim_templates/shared/output_column.py @@ -0,0 +1,97 @@ +from urbansim_templates import __version__ + + +class OutputColumnSettings(): + """ + Stores standard parameters and logic used by templates that generate or modify + columns. Parameters can be passed to the constructor or set as attributes. + + Parameters + ---------- + column_name : str, optional + Name of the Orca column to be created or modified. Generally required before + running a configured template. + + table : str, optional + Name of Orca table the column will be associated with. Generally required before + running the configured template. + + data_type : str, optional + Python type or ``numpy.dtype`` to case the column's values to. + + missing_values : str or numeric, optional + Value to use for rows that would otherwise be missing. + + cache : bool, default False + Whether to cache column values after they are calculated + + cache_scope : 'step', 'iteration', or 'forever', default 'forever' + How long to cache column values for (ignored if ``cache`` is False). + + """ + # TO DO: say something about Orca defaults and about core vs. computed columns. + + def __init__(self, + column_name = None, + table = None, + data_type = None, + missing_values = None, + cache = False, + cache_scope = 'forever'): + + self.column_name = column_name + self.table = table + self.data_type = data_type + self.missing_values = missing_values + self.cache = cache + self.cache_scope = cache_scope + + # automatic attributes + self.modelmanager_version = __version__ + + + @classmethod + def from_dict(cls, d): + """ + Create a class instance from a saved dictionary representation. + + Parameters + ---------- + d : dict + + Returns + ------- + meta : OutputColumnSettings + + """ + obj = cls( + column_name = d['column_name'], + table = d['table'], + data_type = d['data_type'], + missing_values = d['missing_values'], + cache = d['cache'], + cache_scope = d['cache_scope'], + ) + return obj + + + def to_dict(self): + """ + Create a dictionary representation of the object. + + Returns + ------- + d : dict + + """ + d = { + 'column_name': self.column_name, + 'table': self.table, + 'data_type': self.data_type, + 'missing_values': self.missing_values, + 'cache': self.cache, + 'cache_scope': self.cache_scope, + 'modelmanager_version': self.modelmanager_version, + } + return d + From 60fe04d8ec79fb43273071295f8d8a390b6aa79a Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Tue, 2 Apr 2019 14:23:07 -0700 Subject: [PATCH 06/12] Adding ExpressionSettings --- tests/test_column_expression.py | 32 ++-- tests/test_shared_output_column.py | 26 +++ .../data/column_from_expression.py | 149 ++++++++---------- urbansim_templates/shared/core.py | 2 +- urbansim_templates/shared/output_column.py | 14 +- 5 files changed, 111 insertions(+), 112 deletions(-) create mode 100644 tests/test_shared_output_column.py diff --git a/tests/test_column_expression.py b/tests/test_column_expression.py index b5d8bbd..3204b87 100644 --- a/tests/test_column_expression.py +++ b/tests/test_column_expression.py @@ -40,8 +40,8 @@ def test_missing_colname(orca_session): """ c = ColumnFromExpression() - c.table = 'tab' - c.expression = 'a' + c.data.table = 'tab' + c.data.expression = 'a' try: c.run() @@ -58,8 +58,8 @@ def test_missing_table(orca_session): """ c = ColumnFromExpression() + c.data.expression = 'a' c.output.column_name = 'col' - c.expression = 'a' try: c.run() @@ -76,8 +76,8 @@ def test_missing_expression(orca_session): """ c = ColumnFromExpression() + c.data.table = 'tab' c.output.column_name = 'col' - c.table = 'tab' try: c.run() @@ -94,9 +94,9 @@ def test_expression(orca_session): """ c = ColumnFromExpression() + c.data.table = 'obs' + c.data.expression = 'a * 5 + sqrt(b)' c.output.column_name = 'c' - c.table = 'obs' - c.expression = 'a * 5 + sqrt(b)' c.run() @@ -114,9 +114,9 @@ def test_data_type(orca_session): orca.add_table('tab', pd.DataFrame({'a': [0.1, 1.33, 2.4]})) c = ColumnFromExpression() + c.data.table = 'tab' + c.data.expression = 'a' c.output.column_name = 'b' - c.table = 'tab' - c.expression = 'a' c.run() v1 = orca.get_table('tab').get_column('b').values @@ -137,9 +137,9 @@ def test_missing_values(orca_session): orca.add_table('tab', pd.DataFrame({'a': [0.1, np.nan, 2.4]})) c = ColumnFromExpression() + c.data.table = 'tab' + c.data.expression = 'a' c.output.column_name = 'b' - c.table = 'tab' - c.expression = 'a' c.run() v1 = orca.get_table('tab').get_column('b').values @@ -158,9 +158,9 @@ def test_modelmanager_registration(orca_session): """ c = ColumnFromExpression() + c.data.table = 'obs' + c.data.expression = 'a + b' c.output.column_name = 'c' - c.table = 'obs' - c.expression = 'a + b' modelmanager.register(c) modelmanager.remove_step(c.meta.name) @@ -174,17 +174,17 @@ def test_expression_with_standalone_columns(orca_session): """ c = ColumnFromExpression() + c.data.table = 'obs' + c.data.expression = 'a + b' c.output.column_name = 'c' - c.table = 'obs' - c.expression = 'a + b' modelmanager.register(c) modelmanager.remove_step(c.meta.name) d = ColumnFromExpression() + d.data.table = 'obs' + d.data.expression = 'a + c' d.output.column_name = 'd' - d.table = 'obs' - d.expression = 'a + c' d.run() assert('d' in orca.get_table('obs').columns) diff --git a/tests/test_shared_output_column.py b/tests/test_shared_output_column.py new file mode 100644 index 0000000..c1ef94d --- /dev/null +++ b/tests/test_shared_output_column.py @@ -0,0 +1,26 @@ +from __future__ import print_function + +import pytest + +from urbansim_templates.shared import CoreTemplateSettings + + +def test_property_persistence(): + """ + Confirm properties persist through to_dict() and from_dict(). + + """ + obj = CoreTemplateSettings() + obj.column_name = 'column' + obj.table = 'table' + obj.data_type = 'int32' + obj.missing_values = 5 + obj.cache = True + obj.cache_scope = 'iteration' + + d = obj.to_dict() + print(d) + + obj2 = CoreTemplateSettings.from_dict(d) + assert(obj2.to_dict() == d) + diff --git a/urbansim_templates/data/column_from_expression.py b/urbansim_templates/data/column_from_expression.py index 0a545f6..ded27d0 100644 --- a/urbansim_templates/data/column_from_expression.py +++ b/urbansim_templates/data/column_from_expression.py @@ -8,6 +8,36 @@ from urbansim_templates.utils import get_df +class ExpressionSettings(): + """ + Stores custom parameters used by the ColumnFromExpression template. Parameters can be + passed to the constructor or set as attributes. + + Parameters + ---------- + table : str, optional + Name of Orca table the expression will be evaluated on. Required before running + then template. + + expression : str, optional + String describing operations on existing columns of the table, for example + "a/log(b+c)". Required before running. Supports arithmetic and math functions + including sqrt, abs, log, log1p, exp, and expm1 -- see Pandas ``df.eval()`` + documentation for further details. + + """ + def __init__(self, table = None, expression = None): + self.table = table + self.expression = expression + + @classmethod + def from_dict(cls, d): + return cls(table=d['table'], expression=d['expression']) + + def to_dict(self): + return {'table': self.table, 'expression': self.expression} + + @modelmanager.template class ColumnFromExpression(): """ @@ -18,86 +48,49 @@ class ColumnFromExpression(): The expression will be passed to ``df.eval()`` and can refer to any columns in the same table. See the Pandas documentation for further details. - Parameters can be passed to the constructor or set as attributes. - Parameters ---------- meta : :mod:`~urbansim_templates.shared.CoreTemplateSettings`, optional - Stores a name for the configured template and other standard settings. For - column templates, the default for 'autorun' is True. - - table : str, optional - Name of the Orca table the column will be associated with. Required before - running. - - expression : str, optional - String describing operations on existing columns of the table, for example - "a/log(b+c)". Required before running. Supports arithmetic and math functions - including sqrt, abs, log, log1p, exp, and expm1 -- see Pandas ``df.eval()`` - documentation for further details. + Standard parameters. This template sets the default value of ``meta.autorun`` + to True. + data : :mod:`~urbansim_templates.data.ExpressionSettings`, optional + Special parameters for this template. + output : :mod:`~urbansim_templates.shared.OutputColumnSettings`, optional - Stores settings for the column that will be generated. + Parameters for the column that will be generated. This template uses + ``data.table`` as the default value for ``output.table``. """ - def __init__(self, - meta = None, - table = None, - expression = None, - output = None): - - if meta is None: - self.meta = CoreTemplateSettings(autorun=True) + def __init__(self, meta=None, data=None, output=None): + self.meta = CoreTemplateSettings(autorun=True) if meta is None else meta self.meta.template = self.__class__.__name__ self.meta.template_version = __version__ - # Template-specific settings - self.table = table - self.expression = expression - - if output is None: - self.output = OutputColumnSettings() + self.data = ExpressionSettings() if data is None else data + self.output = OutputColumnSettings() if output is None else output @classmethod def from_dict(cls, d): - """ - Create an object instance from a saved dictionary representation. - - Parameters - ---------- - d : dict - Returns - ------- - Table + if 'meta' not in d: + return ColumnFromExpression.from_dict_0_2_dev5(d) - """ - obj = cls( - meta = d['meta'], - table = d['table'], - expression = d['expression'], - output = d['output'], - ) - return obj + return cls( + meta = CoreTemplateSettings.from_dict(d['meta']), + data = ExpressionSettings.from_dict(d['data']), + output = OutputColumnSettings.from_dict(d['output'])) @classmethod def from_dict_0_2_dev5(cls, d): """ - Create an object instance from a saved dictionary representation. - - Parameters - ---------- - d : dict - - Returns - ------- - Table + Converter to read saved data from 0.2.dev5 or earlier. """ - obj = cls( + return cls( column_name = d['column_name'], table = d['table'], expression = d['expression'], @@ -107,58 +100,42 @@ def from_dict_0_2_dev5(cls, d): cache_scope = d['cache_scope'], name = d['name'], tags = d['tags'], - autorun = d['autorun'] - ) - return obj + autorun = d['autorun']) def to_dict(self): - """ - Create a dictionary representation of the object. - - Returns - ------- - dict - - """ - d = { - 'meta': self.meta.to_dict(), - 'table': self.table, - 'expression': self.expression, - 'output': self.output.to_dict(), - } - return d + return { + 'meta': self.meta.to_dict(), + 'data': self.data.to_dict(), + 'output': self.output.to_dict()} def run(self): """ Run the template, registering a column of derived data with Orca. - Requires values to be set for ``column_name``, ``table``, and ``expression``. - - Returns - ------- - None + Requires values to be set for ``data.table``, ``data.expression``, and + ``output.column_name``. """ - if self.output.column_name is None: - raise ValueError("Please provide a column name") - - table = self.table if self.output.table is None else self.output.table + table = self.data.table if self.output.table is None else self.output.table if table is None: raise ValueError("Please provide a table") - if self.expression is None: + if self.data.expression is None: raise ValueError("Please provide an expression") + if self.output.column_name is None: + raise ValueError("Please provide a column name") + # Some column names in the expression may not be part of the core DataFrame, so # we'll need to request them from Orca explicitly. This regex pulls out column # names into a list, by identifying tokens in the expression that begin with a # letter and contain any number of alphanumerics or underscores, but do not end # with an opening parenthesis. This will also pick up constants, like "pi", but # invalid column names will be ignored when we request them from get_df(). - cols = re.findall('[a-zA-Z_][a-zA-Z0-9_]*(?!\()', self.expression) + cols = re.findall('[a-zA-Z_][a-zA-Z0-9_]*(?!\()', self.data.expression) @orca.column(table_name = table, column_name = self.output.column_name, @@ -166,7 +143,7 @@ def run(self): cache_scope = self.output.cache_scope) def orca_column(): df = get_df(table, columns=cols) - series = df.eval(self.expression) + series = df.eval(self.data.expression) if self.output.missing_values is not None: series = series.fillna(self.output.missing_values) diff --git a/urbansim_templates/shared/core.py b/urbansim_templates/shared/core.py index 7c226ca..02f099b 100644 --- a/urbansim_templates/shared/core.py +++ b/urbansim_templates/shared/core.py @@ -64,7 +64,7 @@ def from_dict(cls, d): Returns ------- - meta : CoreTemplateSettings + obj : CoreTemplateSettings """ obj = cls( diff --git a/urbansim_templates/shared/output_column.py b/urbansim_templates/shared/output_column.py index 6ff376f..d8d5759 100644 --- a/urbansim_templates/shared/output_column.py +++ b/urbansim_templates/shared/output_column.py @@ -61,18 +61,16 @@ def from_dict(cls, d): Returns ------- - meta : OutputColumnSettings + obj : OutputColumnSettings """ - obj = cls( + return cls( column_name = d['column_name'], table = d['table'], data_type = d['data_type'], missing_values = d['missing_values'], cache = d['cache'], - cache_scope = d['cache_scope'], - ) - return obj + cache_scope = d['cache_scope']) def to_dict(self): @@ -84,14 +82,12 @@ def to_dict(self): d : dict """ - d = { + return { 'column_name': self.column_name, 'table': self.table, 'data_type': self.data_type, 'missing_values': self.missing_values, 'cache': self.cache, 'cache_scope': self.cache_scope, - 'modelmanager_version': self.modelmanager_version, - } - return d + 'modelmanager_version': self.modelmanager_version} From 5d1ed62368db2c8ae83cdb82d37131f3daa67fce Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Tue, 2 Apr 2019 15:40:20 -0700 Subject: [PATCH 07/12] Converter for older yaml files --- .../data/column_from_expression.py | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/urbansim_templates/data/column_from_expression.py b/urbansim_templates/data/column_from_expression.py index ded27d0..dbe559a 100644 --- a/urbansim_templates/data/column_from_expression.py +++ b/urbansim_templates/data/column_from_expression.py @@ -42,12 +42,10 @@ def to_dict(self): class ColumnFromExpression(): """ Template to register a column of derived data with Orca, based on an expression. The - column will be associated with an existing table. Values will be calculated lazily, - only when the column is needed for a specific operation. - - The expression will be passed to ``df.eval()`` and can refer to any columns in the - same table. See the Pandas documentation for further details. - + expression can refer to any columns in the same table, and will be evaluated using + ``df.eval()``. Values will be calculated lazily, only when the column is needed for + a specific operation. + Parameters ---------- meta : :mod:`~urbansim_templates.shared.CoreTemplateSettings`, optional @@ -76,7 +74,7 @@ def __init__(self, meta=None, data=None, output=None): def from_dict(cls, d): if 'meta' not in d: - return ColumnFromExpression.from_dict_0_2_dev5(d) + return cls.from_dict_0_2_dev5(d) return cls( meta = CoreTemplateSettings.from_dict(d['meta']), @@ -91,16 +89,19 @@ def from_dict_0_2_dev5(cls, d): """ return cls( - column_name = d['column_name'], - table = d['table'], - expression = d['expression'], - data_type = d['data_type'], - missing_values = d['missing_values'], - cache = d['cache'], - cache_scope = d['cache_scope'], - name = d['name'], - tags = d['tags'], - autorun = d['autorun']) + meta = CoreTemplateSettings( + name = d['name'], + tags = d['tags'], + autorun = d['autorun']), + data = ExpressionSettings( + table = d['table'], + expression = d['expression']), + output = OutputColumnSettings( + column_name = d['column_name'], + data_type = d['data_type'], + missing_values = d['missing_values'], + cache = d['cache'], + cache_scope = d['cache_scope'])) def to_dict(self): From f159d532d041f8f4963ee21cd57bd57268f45188 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Wed, 3 Apr 2019 17:55:58 -0700 Subject: [PATCH 08/12] Refactoring column utilities --- tests/test_column_expression.py | 18 ++++++- urbansim_templates/data/__init__.py | 2 +- .../data/column_from_expression.py | 54 +++++++++++-------- urbansim_templates/shared/__init__.py | 2 +- urbansim_templates/shared/output_column.py | 35 ++++++++++++ urbansim_templates/utils.py | 22 ++++++++ 6 files changed, 109 insertions(+), 24 deletions(-) diff --git a/tests/test_column_expression.py b/tests/test_column_expression.py index 3204b87..394c7c9 100644 --- a/tests/test_column_expression.py +++ b/tests/test_column_expression.py @@ -5,10 +5,26 @@ import orca from urbansim_templates import modelmanager -from urbansim_templates.data import ColumnFromExpression +from urbansim_templates.data import ColumnFromExpression, ExpressionSettings from urbansim_templates.utils import validate_template +def test_expression_settings_persistence(): + """ + Confirm ExpressionSettings properties persist through to_dict() and from_dict(). + + """ + obj = ExpressionSettings() + obj.table = 'table' + obj.expression = 'expression' + + d = obj.to_dict() + print(d) + + obj2 = ExpressionSettings.from_dict(d) + assert(obj2.to_dict() == d) + + @pytest.fixture def orca_session(): """ diff --git a/urbansim_templates/data/__init__.py b/urbansim_templates/data/__init__.py index 90dc264..e9c7c54 100644 --- a/urbansim_templates/data/__init__.py +++ b/urbansim_templates/data/__init__.py @@ -1,3 +1,3 @@ -from .column_from_expression import ColumnFromExpression +from .column_from_expression import ColumnFromExpression, ExpressionSettings from .load_table import LoadTable from .save_table import SaveTable diff --git a/urbansim_templates/data/column_from_expression.py b/urbansim_templates/data/column_from_expression.py index dbe559a..9cd547c 100644 --- a/urbansim_templates/data/column_from_expression.py +++ b/urbansim_templates/data/column_from_expression.py @@ -1,11 +1,8 @@ -import re - import orca import pandas as pd -from urbansim_templates import modelmanager, __version__ +from urbansim_templates import modelmanager, shared, utils, __version__ from urbansim_templates.shared import CoreTemplateSettings, OutputColumnSettings -from urbansim_templates.utils import get_df class ExpressionSettings(): @@ -119,9 +116,10 @@ def run(self): ``output.column_name``. """ - table = self.data.table if self.output.table is None else self.output.table - if table is None: +# table = self.data.table if self.output.table is None else self.output.table + + if self.data.table is None: raise ValueError("Please provide a table") if self.data.expression is None: @@ -130,28 +128,42 @@ def run(self): if self.output.column_name is None: raise ValueError("Please provide a column name") + settings = self.output + + if settings.table is None: + settings.table = self.data.table + # Some column names in the expression may not be part of the core DataFrame, so # we'll need to request them from Orca explicitly. This regex pulls out column # names into a list, by identifying tokens in the expression that begin with a # letter and contain any number of alphanumerics or underscores, but do not end # with an opening parenthesis. This will also pick up constants, like "pi", but # invalid column names will be ignored when we request them from get_df(). - cols = re.findall('[a-zA-Z_][a-zA-Z0-9_]*(?!\()', self.data.expression) - - @orca.column(table_name = table, - column_name = self.output.column_name, - cache = self.output.cache, - cache_scope = self.output.cache_scope) - def orca_column(): - df = get_df(table, columns=cols) +# cols = re.findall('[a-zA-Z_][a-zA-Z0-9_]*(?!\()', self.data.expression) + + cols = utils.cols_in_expression(self.data.expression) + + def build_column(): + df = utils.get_df(self.data.table, columns=cols) series = df.eval(self.data.expression) - - if self.output.missing_values is not None: - series = series.fillna(self.output.missing_values) - - if self.output.data_type is not None: - series = series.astype(self.output.data_type) - return series + + shared.register_column(build_column, settings) + +# @orca.column(table_name = table, +# column_name = self.output.column_name, +# cache = self.output.cache, +# cache_scope = self.output.cache_scope) +# def orca_column(): +# df = get_df(table, columns=cols) +# series = df.eval(self.data.expression) +# +# if self.output.missing_values is not None: +# series = series.fillna(self.output.missing_values) +# +# if self.output.data_type is not None: +# series = series.astype(self.output.data_type) +# +# return series \ No newline at end of file diff --git a/urbansim_templates/shared/__init__.py b/urbansim_templates/shared/__init__.py index 67e76ac..c2c00b1 100644 --- a/urbansim_templates/shared/__init__.py +++ b/urbansim_templates/shared/__init__.py @@ -1,2 +1,2 @@ from .core import CoreTemplateSettings -from .output_column import OutputColumnSettings +from .output_column import OutputColumnSettings, register_column diff --git a/urbansim_templates/shared/output_column.py b/urbansim_templates/shared/output_column.py index d8d5759..7198438 100644 --- a/urbansim_templates/shared/output_column.py +++ b/urbansim_templates/shared/output_column.py @@ -1,3 +1,5 @@ +import orca + from urbansim_templates import __version__ @@ -91,3 +93,36 @@ def to_dict(self): 'cache_scope': self.cache_scope, 'modelmanager_version': self.modelmanager_version} + +###################################### +###################################### + + +def register_column(build_column, settings): + """ + Register a callable as an Orca column. + + Parameters + ---------- + build_column : callable + Callable should return a ``pd.Series``. + + settings : ColumnOutputSettings + + """ + @orca.column(table_name = settings.table, + column_name = settings.column_name, + cache = settings.cache, + cache_scope = settings.cache_scope) + + def orca_column(): + series = build_column() + + if settings.missing_values is not None: + series = series.fillna(settings.missing_values) + + if settings.data_type is not None: + series = series.astype(settings.data_type) + + return series + diff --git a/urbansim_templates/utils.py b/urbansim_templates/utils.py index 73dc6d7..7879ffb 100644 --- a/urbansim_templates/utils.py +++ b/urbansim_templates/utils.py @@ -1,5 +1,6 @@ from __future__ import print_function +import re from datetime import datetime as dt import pandas as pd @@ -346,6 +347,27 @@ def all_cols(table): return list(table.index.names) + list(table.columns) +def cols_in_expression(expression): + """ + Extract all possible column names from a ``df.eval()``-style expression. + + This is achieved using regex to identify tokens in the expression that begin with a + letter and contain any number of alphanumerics or underscores, but do not end with an + opening parenthesis. This excludes function names, but would not exclude constants + (e.g. "pi"), which are semantically indistinguishable from column names. + + Parameters + ---------- + expression : str + + Returns + ------- + cols : list of str + + """ + return re.findall('[a-zA-Z_][a-zA-Z0-9_]*(?!\()', expression) + + def trim_cols(df, columns=None): """ Limit a DataFrame to columns that appear in a list of names. List may contain From 1a666f28ca11ae494c323d5e9812b0be08b0f023 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Wed, 3 Apr 2019 21:08:31 -0700 Subject: [PATCH 09/12] Remaining tests --- tests/test_column_expression.py | 88 +++++++------------ tests/test_shared_core.py | 2 +- tests/test_shared_output_column.py | 77 +++++++++++++++- .../data/column_from_expression.py | 27 ------ 4 files changed, 108 insertions(+), 86 deletions(-) diff --git a/tests/test_column_expression.py b/tests/test_column_expression.py index 394c7c9..dc288c4 100644 --- a/tests/test_column_expression.py +++ b/tests/test_column_expression.py @@ -11,18 +11,44 @@ def test_expression_settings_persistence(): """ - Confirm ExpressionSettings properties persist through to_dict() and from_dict(). + Confirm ExpressionSettings properties persist through the constructor, to_dict(), + and from_dict(). """ - obj = ExpressionSettings() - obj.table = 'table' - obj.expression = 'expression' + d = {'table': 'tab', 'expression': 'a + b + c'} + obj = ExpressionSettings(table = 'tab', expression = 'a + b + c') - d = obj.to_dict() - print(d) + assert(d == obj.to_dict() == ExpressionSettings.from_dict(d).to_dict()) + + +def test_legacy_data_loader(orca_session): + """ + Check that loading a saved dict with the legacy format works. + + """ + d = { + 'name': 'n', + 'tags': ['a', 'b'], + 'autorun': False, + 'column_name': 'col', + 'table': 'tab', + 'expression': 'abc', + 'data_type': 'int', + 'missing_values': 5, + 'cache': True, + 'cache_scope': 'step'} - obj2 = ExpressionSettings.from_dict(d) - assert(obj2.to_dict() == d) + c = ColumnFromExpression.from_dict(d) + assert(c.meta.name == d['name']) + assert(c.meta.tags == d['tags']) + assert(c.meta.autorun == d['autorun']) + assert(c.data.table == d['table']) + assert(c.data.expression == d['expression']) + assert(c.output.column_name == d['column_name']) + assert(c.output.data_type == d['data_type']) + assert(c.output.missing_values == d['missing_values']) + assert(c.output.cache == d['cache']) + assert(c.output.cache_scope == d['cache_scope']) @pytest.fixture @@ -122,52 +148,6 @@ def test_expression(orca_session): assert(val1.equals(val2)) -def test_data_type(orca_session): - """ - Check that casting data type works. - - """ - orca.add_table('tab', pd.DataFrame({'a': [0.1, 1.33, 2.4]})) - - c = ColumnFromExpression() - c.data.table = 'tab' - c.data.expression = 'a' - c.output.column_name = 'b' - c.run() - - v1 = orca.get_table('tab').get_column('b').values - np.testing.assert_equal(v1, [0.1, 1.33, 2.4]) - - c.output.data_type = 'int' - c.run() - - v1 = orca.get_table('tab').get_column('b').values - np.testing.assert_equal(v1, [0, 1, 2]) - - -def test_missing_values(orca_session): - """ - Check that filling in missing values works. - - """ - orca.add_table('tab', pd.DataFrame({'a': [0.1, np.nan, 2.4]})) - - c = ColumnFromExpression() - c.data.table = 'tab' - c.data.expression = 'a' - c.output.column_name = 'b' - c.run() - - v1 = orca.get_table('tab').get_column('b').values - np.testing.assert_equal(v1, [0.1, np.nan, 2.4]) - - c.output.missing_values = 5 - c.run() - - v1 = orca.get_table('tab').get_column('b').values - np.testing.assert_equal(v1, [0.1, 5.0, 2.4]) - - def test_modelmanager_registration(orca_session): """ Check that modelmanager registration and auto-run work as expected. diff --git a/tests/test_shared_core.py b/tests/test_shared_core.py index 855e762..d0018b3 100644 --- a/tests/test_shared_core.py +++ b/tests/test_shared_core.py @@ -7,7 +7,7 @@ def test_property_persistence(): """ - Confirm properties persist through to_dict() and from_dict(). + Confirm CoreTemplateSettings properties persist through to_dict() and from_dict(). """ obj = CoreTemplateSettings() diff --git a/tests/test_shared_output_column.py b/tests/test_shared_output_column.py index c1ef94d..f2627f9 100644 --- a/tests/test_shared_output_column.py +++ b/tests/test_shared_output_column.py @@ -1,16 +1,20 @@ from __future__ import print_function +import numpy as np +import pandas as pd import pytest -from urbansim_templates.shared import CoreTemplateSettings +import orca + +from urbansim_templates.shared import OutputColumnSettings, register_column def test_property_persistence(): """ - Confirm properties persist through to_dict() and from_dict(). + Confirm OutputColumnSettings properties persist through to_dict() and from_dict(). """ - obj = CoreTemplateSettings() + obj = OutputColumnSettings() obj.column_name = 'column' obj.table = 'table' obj.data_type = 'int32' @@ -21,6 +25,71 @@ def test_property_persistence(): d = obj.to_dict() print(d) - obj2 = CoreTemplateSettings.from_dict(d) + obj2 = OutputColumnSettings.from_dict(d) assert(obj2.to_dict() == d) + +# Tests for register_column().. + +@pytest.fixture +def orca_session(): + """ + Set up a clean Orca session, with a data table. + + """ + orca.clear_all() + + df = pd.DataFrame({'a': [0.1, 1.33, 2.4]}, index=[1,2,3]) + orca.add_table('tab', df) + + +def test_column_registration(orca_session): + """ + Confirm column registration works. + + """ + series = pd.Series([4,5,6], index=[1,2,3]) + + def build_column(): + return series + + settings = OutputColumnSettings(column_name='col', table='tab') + register_column(build_column, settings) + + assert(orca.get_table('tab').get_column('col').equals(series)) + + +def test_filling_missing_values(orca_session): + """ + Confirm that filling missing values works. + + """ + series1 = pd.Series([4.0, np.nan, 6.0], index=[1,2,3]) + series2 = pd.Series([4.0, 5.0, 6.0], index=[1,2,3]) + + def build_column(): + return series1 + + settings = OutputColumnSettings(column_name='col', table='tab', missing_values=5) + register_column(build_column, settings) + + assert(orca.get_table('tab').get_column('col').equals(series2)) + + +def test_casting_data_type(orca_session): + """ + Confirm that filling missing values works. + + """ + series1 = pd.Series([4.0, 5.0, 6.0], index=[1,2,3]) + series2 = pd.Series([4, 5, 6], index=[1,2,3]) + + def build_column(): + return series1 + + settings = OutputColumnSettings(column_name='col', table='tab', data_type='int') + register_column(build_column, settings) + + assert(orca.get_table('tab').get_column('col').equals(series2)) + + diff --git a/urbansim_templates/data/column_from_expression.py b/urbansim_templates/data/column_from_expression.py index 9cd547c..437b3eb 100644 --- a/urbansim_templates/data/column_from_expression.py +++ b/urbansim_templates/data/column_from_expression.py @@ -116,9 +116,6 @@ def run(self): ``output.column_name``. """ - -# table = self.data.table if self.output.table is None else self.output.table - if self.data.table is None: raise ValueError("Please provide a table") @@ -133,14 +130,6 @@ def run(self): if settings.table is None: settings.table = self.data.table - # Some column names in the expression may not be part of the core DataFrame, so - # we'll need to request them from Orca explicitly. This regex pulls out column - # names into a list, by identifying tokens in the expression that begin with a - # letter and contain any number of alphanumerics or underscores, but do not end - # with an opening parenthesis. This will also pick up constants, like "pi", but - # invalid column names will be ignored when we request them from get_df(). -# cols = re.findall('[a-zA-Z_][a-zA-Z0-9_]*(?!\()', self.data.expression) - cols = utils.cols_in_expression(self.data.expression) def build_column(): @@ -149,21 +138,5 @@ def build_column(): return series shared.register_column(build_column, settings) - -# @orca.column(table_name = table, -# column_name = self.output.column_name, -# cache = self.output.cache, -# cache_scope = self.output.cache_scope) -# def orca_column(): -# df = get_df(table, columns=cols) -# series = df.eval(self.data.expression) -# -# if self.output.missing_values is not None: -# series = series.fillna(self.output.missing_values) -# -# if self.output.data_type is not None: -# series = series.astype(self.output.data_type) -# -# return series \ No newline at end of file From daa4c03bb6aceb6c7f3d008b1e2048123ff8b56a Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Thu, 4 Apr 2019 10:31:14 -0700 Subject: [PATCH 10/12] Versioning and changelog --- CHANGELOG.md | 14 ++++++++++---- docs/source/index.rst | 2 +- setup.py | 2 +- urbansim_templates/__init__.py | 2 +- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c196666..0fd22e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,15 @@ ## 0.2 (not yet released) +#### 0.2.dev6 (2019-04-04) + +- introduces classes for storing common settings: `shared.CoreTemplateSettings`, `shared.OutputColumnSettings` +- adds new shared functions: `shared.register_column()`, `utils.cols_in_expression()` +- modifies `ColumnFromExpression` template to divide its parameters into three groups + #### 0.2.dev5 (2019-03-29) -- adds new template: `urbansim_templates.data.ColumnFromExpression` +- adds new template: `data.ColumnFromExpression` #### 0.2.dev4 (2019-03-26) @@ -20,8 +26,8 @@ #### 0.2.dev2 (2019-03-04) -- adds template for saving data: `urbansim_templates.data.SaveTable()` -- renames `TableFromDisk()` to `urbansim_templates.data.LoadTable()` +- adds template for saving data: `data.SaveTable()` +- renames `io.TableFromDisk()` to `data.LoadTable()` #### 0.2.dev1 (2019-02-27) @@ -29,7 +35,7 @@ #### 0.2.dev0 (2019-02-19) -- adds first data i/o template: `urbansim_templates.io.TableFromDisk()` +- adds first data i/o template: `io.TableFromDisk()` - adds support for `autorun` template property diff --git a/docs/source/index.rst b/docs/source/index.rst index 1e5ce05..10451ec 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -10,7 +10,7 @@ UrbanSim Templates provides building blocks for Orca-based simulation models. It The library contains templates for common types of model steps, plus a tool called ModelManager that runs as an extension to the `Orca `__ task orchestrator. ModelManager can register template-based model steps with the orchestrator, save them to disk, and automatically reload them for future sessions. -v0.2.dev5, released March 29, 2019 +v0.2.dev6, released April 4, 2019 Contents diff --git a/setup.py b/setup.py index b77543a..e9e99af 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='urbansim_templates', - version='0.2.dev5', + version='0.2.dev6', description='UrbanSim extension for managing model steps', author='UrbanSim Inc.', author_email='info@urbansim.com', diff --git a/urbansim_templates/__init__.py b/urbansim_templates/__init__.py index aba1d24..8157a0e 100644 --- a/urbansim_templates/__init__.py +++ b/urbansim_templates/__init__.py @@ -1 +1 @@ -version = __version__ = '0.2.dev5' +version = __version__ = '0.2.dev6' From 07d6f70e31496c581e79825c86eacc2bcb741652 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Thu, 4 Apr 2019 14:28:39 -0700 Subject: [PATCH 11/12] Documentation updates --- docs/source/conf.py | 1 + docs/source/data-templates.rst | 45 +++++++---- docs/source/utilities.rst | 81 ++++++++++++++++--- .../data/column_from_expression.py | 28 ++++--- urbansim_templates/data/load_table.py | 5 +- urbansim_templates/data/save_table.py | 5 +- urbansim_templates/shared/core.py | 5 -- urbansim_templates/shared/output_column.py | 4 +- 8 files changed, 124 insertions(+), 50 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 9d7541f..1faf741 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -37,6 +37,7 @@ # ones. extensions = [ 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode'] diff --git a/docs/source/data-templates.rst b/docs/source/data-templates.rst index 9317a3e..ab8b676 100644 --- a/docs/source/data-templates.rst +++ b/docs/source/data-templates.rst @@ -1,13 +1,10 @@ -Data template APIs -================== +Data management templates +========================= Usage ----- -Data templates help you load tables into `Orca `__ or save tables or subsets of tables to disk. - -Example -~~~~~~~ +Data templates help you load tables into `Orca `__, create columns of derived data, or save tables or subsets of tables to disk. .. code-block:: python @@ -75,22 +72,42 @@ From Orca's perspective, tables set up using the :mod:`~urbansim_templates.data. Unlike the templates, Orca relies on user-specified "`broadcast `__" relationships to perform automatic merging of tables. :mod:`~urbansim_templates.data.LoadTable` does not register any broadcasts, because they're not needed if tables follow the schema rules above. So if you use these tables in non-template model steps, you may need to add broadcasts separately. -LoadTable() ------------ +Data loading API +---------------- + +.. currentmodule:: urbansim_templates.data + +.. autosummary:: + LoadTable .. autoclass:: urbansim_templates.data.LoadTable :members: -SaveTable() ------------ +Column creation API +------------------- -.. autoclass:: urbansim_templates.data.SaveTable +.. currentmodule:: urbansim_templates.data + +.. autosummary:: + ColumnFromExpression + ExpressionSettings + +.. autoclass:: urbansim_templates.data.ColumnFromExpression :members: +.. autoclass:: urbansim_templates.data.ExpressionSettings + :members: -ColumnFromExpression() ----------------------- +Data output API +--------------- -.. autoclass:: urbansim_templates.data.ColumnFromExpression +.. currentmodule:: urbansim_templates.data + +.. autosummary:: + SaveTable + +.. autoclass:: urbansim_templates.data.SaveTable :members: + + diff --git a/docs/source/utilities.rst b/docs/source/utilities.rst index 0e3f76e..a86d392 100644 --- a/docs/source/utilities.rst +++ b/docs/source/utilities.rst @@ -1,32 +1,87 @@ -Utilities API -============= +Shared utilities +================ The utilities are mainly helper functions for templates. -Template validation -------------------- +General template tools API +-------------------------- -.. automodule:: urbansim_templates.utils - :members: validate_template +.. currentmodule:: urbansim_templates.shared + +.. autosummary:: + CoreTemplateSettings + +.. automodule:: urbansim_templates.shared + :members: CoreTemplateSettings + + +Column output tools API +----------------------- +.. currentmodule:: urbansim_templates.shared -Table schemas and merging -------------------------- +.. autosummary:: + OutputColumnSettings + register_column + +.. automodule:: urbansim_templates.shared + :members: OutputColumnSettings, register_column + + +Table schemas and merging API +----------------------------- + +.. currentmodule:: urbansim_templates.utils + +.. autosummary:: + validate_table + validate_all_tables + merge_tables .. automodule:: urbansim_templates.utils :members: validate_table, validate_all_tables, merge_tables -Other helper functions ----------------------- +Other helper functions API +-------------------------- + +.. currentmodule:: urbansim_templates.utils + +.. autosummary:: + all_cols + cols_in_expression + get_data + get_df + trim_cols + to_list + update_column + update_name .. automodule:: urbansim_templates.utils - :members: all_cols, get_data, get_df, trim_cols, update_column, to_list, update_column, update_name + :members: all_cols, cols_in_expression, get_data, get_df, trim_cols, to_list, update_column, update_name + + +Spec validation API +------------------- + +.. currentmodule:: urbansim_templates.utils + +.. autosummary:: + validate_template + +.. automodule:: urbansim_templates.utils + :members: validate_template + + +Version management API +---------------------- +.. currentmodule:: urbansim_templates.utils -Version management ------------------- +.. autosummary:: + parse_version + version_greater_or_equal .. automodule:: urbansim_templates.utils :members: parse_version, version_greater_or_equal diff --git a/urbansim_templates/data/column_from_expression.py b/urbansim_templates/data/column_from_expression.py index 437b3eb..bf2ae34 100644 --- a/urbansim_templates/data/column_from_expression.py +++ b/urbansim_templates/data/column_from_expression.py @@ -7,7 +7,8 @@ class ExpressionSettings(): """ - Stores custom parameters used by the ColumnFromExpression template. Parameters can be + Stores custom parameters used by the + :mod:`~urbansim_templates.data.ColumnFromExpression` template. Parameters can be passed to the constructor or set as attributes. Parameters @@ -38,10 +39,11 @@ def to_dict(self): @modelmanager.template class ColumnFromExpression(): """ - Template to register a column of derived data with Orca, based on an expression. The - expression can refer to any columns in the same table, and will be evaluated using - ``df.eval()``. Values will be calculated lazily, only when the column is needed for - a specific operation. + Template to register a column of derived data with Orca, based on an expression. + Parameters may be passed to the constructor, but they are easier to set as + attributes. The expression can refer to any columns in the same table, and will be + evaluated using ``df.eval()``. Values will be calculated lazily, only when the column + is needed for a specific operation. Parameters ---------- @@ -69,7 +71,10 @@ def __init__(self, meta=None, data=None, output=None): @classmethod def from_dict(cls, d): + """ + Create a class instance from a saved dictionary. + """ if 'meta' not in d: return cls.from_dict_0_2_dev5(d) @@ -82,7 +87,8 @@ def from_dict(cls, d): @classmethod def from_dict_0_2_dev5(cls, d): """ - Converter to read saved data from 0.2.dev5 or earlier. + Converter to read saved data from 0.2.dev5 or earlier. Automatically invoked by + ``from_dict()`` as needed. """ return cls( @@ -102,6 +108,10 @@ def from_dict_0_2_dev5(cls, d): def to_dict(self): + """ + Create a dictionary representation of the object. + + """ return { 'meta': self.meta.to_dict(), 'data': self.data.to_dict(), @@ -110,10 +120,8 @@ def to_dict(self): def run(self): """ - Run the template, registering a column of derived data with Orca. - - Requires values to be set for ``data.table``, ``data.expression``, and - ``output.column_name``. + Run the template, registering a column of derived data with Orca. Requires values + to be set for ``data.table``, ``data.expression``, and ``output.column_name``. """ if self.data.table is None: diff --git a/urbansim_templates/data/load_table.py b/urbansim_templates/data/load_table.py index 344ecf6..3aa6f55 100644 --- a/urbansim_templates/data/load_table.py +++ b/urbansim_templates/data/load_table.py @@ -16,13 +16,12 @@ @modelmanager.template class LoadTable(): """ - Class for registering data tables from local CSV or HDF files. + Template for registering data tables from local CSV or HDF files. Parameters can be + passed to the constructor or set as attributes. An instance of this template class stores *instructions for loading a data table*, packaged into an Orca step. Running the instructions registers the table with Orca. - All the parameters can also be set as properties after creating the class instance. - Parameters ---------- table : str, optional diff --git a/urbansim_templates/data/save_table.py b/urbansim_templates/data/save_table.py index c05813f..052f928 100644 --- a/urbansim_templates/data/save_table.py +++ b/urbansim_templates/data/save_table.py @@ -12,9 +12,8 @@ @modelmanager.template class SaveTable(): """ - Class for saving Orca tables to local CSV or HDF5 files. - - All the parameters can also be set as properties after creating the class instance. + Template for saving Orca tables to local CSV or HDF5 files. Parameters can be passed + to the constructor or set as attributes. Parameters ---------- diff --git a/urbansim_templates/shared/core.py b/urbansim_templates/shared/core.py index 02f099b..e5ba71b 100644 --- a/urbansim_templates/shared/core.py +++ b/urbansim_templates/shared/core.py @@ -28,11 +28,6 @@ class CoreTemplateSettings(): template_version : str Version of the template class package. - Attributes - ---------- - modelmanager_version : str - Version of the ModelManager package that created the CoreTemplateSettings. - """ def __init__(self, name = None, diff --git a/urbansim_templates/shared/output_column.py b/urbansim_templates/shared/output_column.py index 7198438..e66566b 100644 --- a/urbansim_templates/shared/output_column.py +++ b/urbansim_templates/shared/output_column.py @@ -5,8 +5,8 @@ class OutputColumnSettings(): """ - Stores standard parameters and logic used by templates that generate or modify - columns. Parameters can be passed to the constructor or set as attributes. + Stores standard parameters used by templates that generate or modify columns. + Parameters can be passed to the constructor or set as attributes. Parameters ---------- From 7744ad4f2af3758ccb189125fd728b1364a2b8cd Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Thu, 4 Apr 2019 14:33:12 -0700 Subject: [PATCH 12/12] Updating travis script --- .travis.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 99709b9..e294b14 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,12 +10,8 @@ matrix: - python: '3.7' # temp solution until python 3.7 is more cleanly supported dist: xenial sudo: true - allow_failures: - - python: '3.7' # dependencies are blocking installation - fast_finish: true install: - - pip install git+git://github.com/udst/choicemodels.git - pip install . - pip install -r requirements-extras.txt - pip install -r requirements-dev.txt