From b445949d7e0a6c133e5ac3a2825028a895f62dea Mon Sep 17 00:00:00 2001 From: Alan Pinkert Date: Tue, 9 Apr 2024 20:13:24 -0400 Subject: [PATCH] Added CSV parsing to rules engine (#159) * Added CSV parsing to rules engine Co-authored-by: dwindleduck Co-authored-by: Jonathan Kwan Co-authored-by: AdamFinkle Co-authored-by: eriksynn * fixed lints --------- Co-authored-by: dwindleduck Co-authored-by: Jonathan Kwan Co-authored-by: AdamFinkle Co-authored-by: eriksynn --- rules-engine/src/rules_engine/parser.py | 123 ++++++++++++++++++ .../feldman/natural-gas-eversource.csv | 1 + .../quateman/natural-gas-national-grid.csv | 31 +++++ .../tests/test_rules_engine/test_parser.py | 49 +++++++ 4 files changed, 204 insertions(+) create mode 100644 rules-engine/src/rules_engine/parser.py create mode 100644 rules-engine/tests/test_rules_engine/cases/examples/feldman/natural-gas-eversource.csv create mode 100644 rules-engine/tests/test_rules_engine/cases/examples/quateman/natural-gas-national-grid.csv create mode 100644 rules-engine/tests/test_rules_engine/test_parser.py diff --git a/rules-engine/src/rules_engine/parser.py b/rules-engine/src/rules_engine/parser.py new file mode 100644 index 00000000..03d04e71 --- /dev/null +++ b/rules-engine/src/rules_engine/parser.py @@ -0,0 +1,123 @@ +""" +Return lists of gas billing data parsed from Eversource and +National Grid CSVs. +""" +import csv +import io +from datetime import datetime, timedelta + +from .pydantic_models import NaturalGasBillingInput, NaturalGasBillingRecordInput + + +class _GasBillRowEversource: + """ + Holds data for one row of an Eversource gas bill CSV. + + The names of the fields correspond to the first row of the Eversource bill. + + Example: + Read Date,Usage,Number of Days,Usage per day,Charge,Average Temperature + 1/18/2022,184.00,32,5.75,$327.58,30.0 + ... + """ + + def __init__(self, row): + self.read_date = row["Read Date"] + self.usage = row["Usage"] + self.number_of_days = row["Number of Days"] + + +class _GasBillRowNationalGrid: + """ + Holds data for one row of an National Grid gas bill CSV. + + The names of the fields correspond to the row of the National Grid + bill right before the billing data. + + Example: + Name,FIRST LAST,,,,, + Address,"100 PLACE AVE, BOSTON MA 02130",,,,, + Account Number,1111111111,,,,, + Service,Service 1,,,,, + ,,,,,, + TYPE,START DATE,END DATE,USAGE,UNITS,COST,NOTES + Natural gas billing,12/29/2012,1/24/2013,149,therms,$206.91 , + ... + """ + + def __init__(self, row): + self.start_date = row["START DATE"] + self.end_date = row["END DATE"] + self.usage = row["USAGE"] + + +def parse_gas_bill_eversource(data: str) -> NaturalGasBillingInput: + """ + Return a list of gas bill data parsed from an Eversource CSV + received as a string. + + Example: + Read Date,Usage,Number of Days,Usage per day,Charge,Average Temperature + 1/18/2022,184.00,32,5.75,$327.58,30.0 + ... + """ + f = io.StringIO(data) + reader = csv.DictReader(f) + records = [] + for row in reader: + parsed_row = _GasBillRowEversource(row) + period_end_date = datetime.strptime(parsed_row.read_date, "%m/%d/%Y").date() + # Calculate period_start_date using the end date and number of days in the bill + # Care should be taken here to avoid off-by-one errors + period_start_date = period_end_date - timedelta( + days=(int(parsed_row.number_of_days) - 1) + ) + + record = NaturalGasBillingRecordInput( + period_start_date=period_start_date, + period_end_date=period_end_date, + usage_therms=parsed_row.usage, + inclusion_override=None, + ) + records.append(record) + + return NaturalGasBillingInput(records=records) + + +def parse_gas_bill_national_grid(data: str) -> NaturalGasBillingInput: + """ + Return a list of gas bill data parsed from an National Grid CSV + received as a string. + + Example: + Name,FIRST LAST,,,,, + Address,"100 PLACE AVE, BOSTON MA 02130",,,,, + Account Number,1111111111,,,,, + Service,Service 1,,,,, + ,,,,,, + TYPE,START DATE,END DATE,USAGE,UNITS,COST,NOTES + Natural gas billing,12/29/2012,1/24/2013,149,therms,$206.91 , + ... + """ + f = io.StringIO(data) + ROWS_TO_SKIP = 5 + for _ in range(ROWS_TO_SKIP): + next(f) + reader = csv.DictReader(f) + + records = [] + for row in reader: + parsed_row = _GasBillRowNationalGrid(row) + + period_start_date = datetime.strptime(parsed_row.start_date, "%m/%d/%Y").date() + period_end_date = datetime.strptime(parsed_row.end_date, "%m/%d/%Y").date() + + record = NaturalGasBillingRecordInput( + period_start_date=period_start_date, + period_end_date=period_end_date, + usage_therms=parsed_row.usage, + inclusion_override=None, + ) + records.append(record) + + return NaturalGasBillingInput(records=records) diff --git a/rules-engine/tests/test_rules_engine/cases/examples/feldman/natural-gas-eversource.csv b/rules-engine/tests/test_rules_engine/cases/examples/feldman/natural-gas-eversource.csv new file mode 100644 index 00000000..7369977b --- /dev/null +++ b/rules-engine/tests/test_rules_engine/cases/examples/feldman/natural-gas-eversource.csv @@ -0,0 +1 @@ +Read Date,Usage,Number of Days,Usage per day,Charge,Average Temperature 1/18/2022,184.00,32,5.75,$327.58,30.0 12/17/2021,124.00,29,4.28,$224.09,39.0 11/18/2021,89.00,30,2.97,$148.99,48.0 10/19/2021,28.00,32,0.88,$45.72,60.0 9/17/2021,13.00,31,0.42,$25.87,70.0 8/17/2021,14.00,29,0.48,$26.72,71.0 7/19/2021,11.00,32,0.34,$22.77,67.0 6/17/2021,23.00,30,0.77,$36.70,66.0 5/18/2021,68.00,32,2.13,$100.55,53.0 4/16/2021,96.00,30,3.20,$158.93,48.0 3/17/2021,164.00,27,6.07,$264.40,33.0 2/18/2021,229.00,29,7.90,$365.24,26.0 1/20/2021,189.00,30,6.30,$302.95,34.0 12/21/2020,186.00,33,5.64,$290.52,37.0 11/18/2020,91.00,29,3.14,$115.65,49.0 10/20/2020,43.00,33,1.30,$42.57,57.0 9/17/2020,15.00,30,0.50,$21.69,65.0 8/18/2020,16.00,32,0.50,$22.82,76.0 7/17/2020,16.00,29,0.55,$23.35,72.0 6/18/2020,25.00,30,0.83,$32.25,65.0 5/19/2020,93.00,32,2.91,$111.88,49.0 4/17/2020,118.00,29,4.07,$167.67,43.0 3/19/2020,146.00,29,5.03,$205.44,40.0 2/19/2020,177.00,29,6.10,$247.26,33.0 1/21/2020,234.00,35,6.69,$325.07,33.0 12/17/2019,161.00,27,5.96,$227.29,35.0 11/20/2019,115.00,29,3.97,$144.36,42.0 10/22/2019,43.00,35,1.23,$42.33,56.0 9/17/2019,13.00,29,0.45,$19.10,65.0 8/19/2019,14.00,32,0.44,$20.46,73.0 7/18/2019,13.00,30,0.43,$20.13,73.0 6/18/2019,30.00,32,0.94,$37.15,63.0 5/17/2019,67.00,28,2.39,$78.94,52.0 4/19/2019,115.00,30,3.83,$146.53,45.0 3/20/2019,191.00,29,6.59,$237.74,31.0 2/19/2019,200.00,28,7.14,$249.70,29.0 \ No newline at end of file diff --git a/rules-engine/tests/test_rules_engine/cases/examples/quateman/natural-gas-national-grid.csv b/rules-engine/tests/test_rules_engine/cases/examples/quateman/natural-gas-national-grid.csv new file mode 100644 index 00000000..215bafee --- /dev/null +++ b/rules-engine/tests/test_rules_engine/cases/examples/quateman/natural-gas-national-grid.csv @@ -0,0 +1,31 @@ +Name,FIRST LAST,,,,, +Address,"100 STREET AVE, BOSTON MA 02130",,,,, +Account Number,1111111111,,,,, +Service,Service 1,,,,, +,,,,,, +TYPE,START DATE,END DATE,USAGE,UNITS,COST,NOTES +Natural gas billing,10/2/2020,11/4/2020,29,therms,$42.08 , +Natural gas billing,11/5/2020,12/3/2020,36,therms,$65.60 , +Natural gas billing,12/4/2020,1/7/2021,97,therms,$159.49 , +Natural gas billing,1/8/2021,2/5/2021,105,therms,$169.09 , +Natural gas billing,2/6/2021,3/5/2021,98,therms,$158.19 , +Natural gas billing,3/6/2021,4/6/2021,66,therms,$111.79 , +Natural gas billing,4/7/2021,5/5/2021,22,therms,$43.16 , +Natural gas billing,5/6/2021,6/7/2021,19,therms,$32.42 , +Natural gas billing,6/8/2021,7/6/2021,7,therms,$18.68 , +Natural gas billing,7/7/2021,8/4/2021,10,therms,$21.73 , +Natural gas billing,8/5/2021,9/8/2021,11,therms,$25.35 , +Natural gas billing,9/9/2021,10/5/2021,8,therms,$19.58 , +Natural gas billing,10/6/2021,11/3/2021,13,therms,$27.10 , +Natural gas billing,11/4/2021,12/6/2021,41,therms,$87.45 , +Natural gas billing,12/7/2021,1/5/2022,86,therms,$171.92 , +Natural gas billing,1/6/2022,2/3/2022,132,therms,$248.63 , +Natural gas billing,2/4/2022,3/7/2022,116,therms,$226.66 , +Natural gas billing,3/8/2022,4/4/2022,49,therms,$109.44 , +Natural gas billing,4/5/2022,5/5/2022,39,therms,$87.54 , +Natural gas billing,5/6/2022,6/6/2022,20,therms,$44.30 , +Natural gas billing,6/7/2022,7/5/2022,9,therms,$27.71 , +Natural gas billing,7/6/2022,8/3/2022,7,therms,$23.86 , +Natural gas billing,8/4/2022,9/3/2022,8,therms,$24.04 , +Natural gas billing,9/4/2022,10/3/2022,8,therms,$26.41 , +Natural gas billing,10/4/2022,11/3/2022,19,therms,$48.92 , diff --git a/rules-engine/tests/test_rules_engine/test_parser.py b/rules-engine/tests/test_rules_engine/test_parser.py new file mode 100644 index 00000000..e7598897 --- /dev/null +++ b/rules-engine/tests/test_rules_engine/test_parser.py @@ -0,0 +1,49 @@ +import pathlib +from datetime import date + +from rules_engine import parser +from rules_engine.pydantic_models import NaturalGasBillingRecordInput + +ROOT_DIR = pathlib.Path(__file__).parent / "cases" / "examples" + + +def test_parse_gas_bill_eversource(): + with open(ROOT_DIR / "feldman" / "natural-gas-eversource.csv") as f: + s = f.read() + + result = parser.parse_gas_bill_eversource(s) + + assert len(result.records) == 36 + for row in result.records: + assert isinstance(row, NaturalGasBillingRecordInput) + + # input: 12/17/2021,124.00,29,4.28,$224.09,39.0 + # from excel: 11/19/2021,12/17/2021,29,124,,1,4.28,3.82 + + second_row = result.records[1] + assert second_row.period_start_date == date(2021, 11, 19) + assert second_row.period_end_date == date(2021, 12, 17) + assert isinstance(second_row.usage_therms, float) + assert second_row.usage_therms == 124 + assert second_row.inclusion_override == None + + +def test_parse_gas_bill_national_grid(): + with open(ROOT_DIR / "quateman" / "natural-gas-national-grid.csv") as f: + s = f.read() + + result = parser.parse_gas_bill_national_grid(s) + + assert len(result.records) == 25 + for row in result.records: + assert isinstance(row, NaturalGasBillingRecordInput) + + # input: Natural gas billing,11/5/2020,12/3/2020,36,therms,$65.60 , + # from excel: 11/6/2020,12/3/2020,28,36,,1,1.29,0.99 + + second_row = result.records[1] + assert second_row.period_start_date == date(2020, 11, 5) + assert second_row.period_end_date == date(2020, 12, 3) + assert isinstance(second_row.usage_therms, float) + assert second_row.usage_therms == 36 + assert second_row.inclusion_override == None