Skip to content

Commit

Permalink
Added CSV parsing to rules engine (codeforboston#159)
Browse files Browse the repository at this point in the history
* Added CSV parsing to rules engine

Co-authored-by: dwindleduck <[email protected]>
Co-authored-by: Jonathan Kwan <[email protected]>
Co-authored-by: AdamFinkle <[email protected]>
Co-authored-by: eriksynn <[email protected]>

* fixed lints

---------

Co-authored-by: dwindleduck <[email protected]>
Co-authored-by: Jonathan Kwan <[email protected]>
Co-authored-by: AdamFinkle <[email protected]>
Co-authored-by: eriksynn <[email protected]>
  • Loading branch information
5 people authored Apr 10, 2024
1 parent 7ee859f commit 793cacc
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 0 deletions.
123 changes: 123 additions & 0 deletions rules-engine/src/rules_engine/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""
Return lists of gas billing data parsed from Eversource and
National Grid CSVs.
"""
import csv
import io
from datetime import datetime, timedelta

from .pydantic_models import NaturalGasBillingInput, NaturalGasBillingRecordInput


class _GasBillRowEversource:
"""
Holds data for one row of an Eversource gas bill CSV.
The names of the fields correspond to the first row of the Eversource bill.
Example:
Read Date,Usage,Number of Days,Usage per day,Charge,Average Temperature
1/18/2022,184.00,32,5.75,$327.58,30.0
...
"""

def __init__(self, row):
self.read_date = row["Read Date"]
self.usage = row["Usage"]
self.number_of_days = row["Number of Days"]


class _GasBillRowNationalGrid:
"""
Holds data for one row of an National Grid gas bill CSV.
The names of the fields correspond to the row of the National Grid
bill right before the billing data.
Example:
Name,FIRST LAST,,,,,
Address,"100 PLACE AVE, BOSTON MA 02130",,,,,
Account Number,1111111111,,,,,
Service,Service 1,,,,,
,,,,,,
TYPE,START DATE,END DATE,USAGE,UNITS,COST,NOTES
Natural gas billing,12/29/2012,1/24/2013,149,therms,$206.91 ,
...
"""

def __init__(self, row):
self.start_date = row["START DATE"]
self.end_date = row["END DATE"]
self.usage = row["USAGE"]


def parse_gas_bill_eversource(data: str) -> NaturalGasBillingInput:
"""
Return a list of gas bill data parsed from an Eversource CSV
received as a string.
Example:
Read Date,Usage,Number of Days,Usage per day,Charge,Average Temperature
1/18/2022,184.00,32,5.75,$327.58,30.0
...
"""
f = io.StringIO(data)
reader = csv.DictReader(f)
records = []
for row in reader:
parsed_row = _GasBillRowEversource(row)
period_end_date = datetime.strptime(parsed_row.read_date, "%m/%d/%Y").date()
# Calculate period_start_date using the end date and number of days in the bill
# Care should be taken here to avoid off-by-one errors
period_start_date = period_end_date - timedelta(
days=(int(parsed_row.number_of_days) - 1)
)

record = NaturalGasBillingRecordInput(
period_start_date=period_start_date,
period_end_date=period_end_date,
usage_therms=parsed_row.usage,
inclusion_override=None,
)
records.append(record)

return NaturalGasBillingInput(records=records)


def parse_gas_bill_national_grid(data: str) -> NaturalGasBillingInput:
"""
Return a list of gas bill data parsed from an National Grid CSV
received as a string.
Example:
Name,FIRST LAST,,,,,
Address,"100 PLACE AVE, BOSTON MA 02130",,,,,
Account Number,1111111111,,,,,
Service,Service 1,,,,,
,,,,,,
TYPE,START DATE,END DATE,USAGE,UNITS,COST,NOTES
Natural gas billing,12/29/2012,1/24/2013,149,therms,$206.91 ,
...
"""
f = io.StringIO(data)
ROWS_TO_SKIP = 5
for _ in range(ROWS_TO_SKIP):
next(f)
reader = csv.DictReader(f)

records = []
for row in reader:
parsed_row = _GasBillRowNationalGrid(row)

period_start_date = datetime.strptime(parsed_row.start_date, "%m/%d/%Y").date()
period_end_date = datetime.strptime(parsed_row.end_date, "%m/%d/%Y").date()

record = NaturalGasBillingRecordInput(
period_start_date=period_start_date,
period_end_date=period_end_date,
usage_therms=parsed_row.usage,
inclusion_override=None,
)
records.append(record)

return NaturalGasBillingInput(records=records)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Read Date,Usage,Number of Days,Usage per day,Charge,Average Temperature1/18/2022,184.00,32,5.75,$327.58,30.012/17/2021,124.00,29,4.28,$224.09,39.011/18/2021,89.00,30,2.97,$148.99,48.010/19/2021,28.00,32,0.88,$45.72,60.09/17/2021,13.00,31,0.42,$25.87,70.08/17/2021,14.00,29,0.48,$26.72,71.07/19/2021,11.00,32,0.34,$22.77,67.06/17/2021,23.00,30,0.77,$36.70,66.05/18/2021,68.00,32,2.13,$100.55,53.04/16/2021,96.00,30,3.20,$158.93,48.03/17/2021,164.00,27,6.07,$264.40,33.02/18/2021,229.00,29,7.90,$365.24,26.01/20/2021,189.00,30,6.30,$302.95,34.012/21/2020,186.00,33,5.64,$290.52,37.011/18/2020,91.00,29,3.14,$115.65,49.010/20/2020,43.00,33,1.30,$42.57,57.09/17/2020,15.00,30,0.50,$21.69,65.08/18/2020,16.00,32,0.50,$22.82,76.07/17/2020,16.00,29,0.55,$23.35,72.06/18/2020,25.00,30,0.83,$32.25,65.05/19/2020,93.00,32,2.91,$111.88,49.04/17/2020,118.00,29,4.07,$167.67,43.03/19/2020,146.00,29,5.03,$205.44,40.02/19/2020,177.00,29,6.10,$247.26,33.01/21/2020,234.00,35,6.69,$325.07,33.012/17/2019,161.00,27,5.96,$227.29,35.011/20/2019,115.00,29,3.97,$144.36,42.010/22/2019,43.00,35,1.23,$42.33,56.09/17/2019,13.00,29,0.45,$19.10,65.08/19/2019,14.00,32,0.44,$20.46,73.07/18/2019,13.00,30,0.43,$20.13,73.06/18/2019,30.00,32,0.94,$37.15,63.05/17/2019,67.00,28,2.39,$78.94,52.04/19/2019,115.00,30,3.83,$146.53,45.03/20/2019,191.00,29,6.59,$237.74,31.02/19/2019,200.00,28,7.14,$249.70,29.0
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
Name,FIRST LAST,,,,,
Address,"100 STREET AVE, BOSTON MA 02130",,,,,
Account Number,1111111111,,,,,
Service,Service 1,,,,,
,,,,,,
TYPE,START DATE,END DATE,USAGE,UNITS,COST,NOTES
Natural gas billing,10/2/2020,11/4/2020,29,therms,$42.08 ,
Natural gas billing,11/5/2020,12/3/2020,36,therms,$65.60 ,
Natural gas billing,12/4/2020,1/7/2021,97,therms,$159.49 ,
Natural gas billing,1/8/2021,2/5/2021,105,therms,$169.09 ,
Natural gas billing,2/6/2021,3/5/2021,98,therms,$158.19 ,
Natural gas billing,3/6/2021,4/6/2021,66,therms,$111.79 ,
Natural gas billing,4/7/2021,5/5/2021,22,therms,$43.16 ,
Natural gas billing,5/6/2021,6/7/2021,19,therms,$32.42 ,
Natural gas billing,6/8/2021,7/6/2021,7,therms,$18.68 ,
Natural gas billing,7/7/2021,8/4/2021,10,therms,$21.73 ,
Natural gas billing,8/5/2021,9/8/2021,11,therms,$25.35 ,
Natural gas billing,9/9/2021,10/5/2021,8,therms,$19.58 ,
Natural gas billing,10/6/2021,11/3/2021,13,therms,$27.10 ,
Natural gas billing,11/4/2021,12/6/2021,41,therms,$87.45 ,
Natural gas billing,12/7/2021,1/5/2022,86,therms,$171.92 ,
Natural gas billing,1/6/2022,2/3/2022,132,therms,$248.63 ,
Natural gas billing,2/4/2022,3/7/2022,116,therms,$226.66 ,
Natural gas billing,3/8/2022,4/4/2022,49,therms,$109.44 ,
Natural gas billing,4/5/2022,5/5/2022,39,therms,$87.54 ,
Natural gas billing,5/6/2022,6/6/2022,20,therms,$44.30 ,
Natural gas billing,6/7/2022,7/5/2022,9,therms,$27.71 ,
Natural gas billing,7/6/2022,8/3/2022,7,therms,$23.86 ,
Natural gas billing,8/4/2022,9/3/2022,8,therms,$24.04 ,
Natural gas billing,9/4/2022,10/3/2022,8,therms,$26.41 ,
Natural gas billing,10/4/2022,11/3/2022,19,therms,$48.92 ,
49 changes: 49 additions & 0 deletions rules-engine/tests/test_rules_engine/test_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import pathlib
from datetime import date

from rules_engine import parser
from rules_engine.pydantic_models import NaturalGasBillingRecordInput

ROOT_DIR = pathlib.Path(__file__).parent / "cases" / "examples"


def test_parse_gas_bill_eversource():
with open(ROOT_DIR / "feldman" / "natural-gas-eversource.csv") as f:
s = f.read()

result = parser.parse_gas_bill_eversource(s)

assert len(result.records) == 36
for row in result.records:
assert isinstance(row, NaturalGasBillingRecordInput)

# input: 12/17/2021,124.00,29,4.28,$224.09,39.0
# from excel: 11/19/2021,12/17/2021,29,124,,1,4.28,3.82

second_row = result.records[1]
assert second_row.period_start_date == date(2021, 11, 19)
assert second_row.period_end_date == date(2021, 12, 17)
assert isinstance(second_row.usage_therms, float)
assert second_row.usage_therms == 124
assert second_row.inclusion_override == None


def test_parse_gas_bill_national_grid():
with open(ROOT_DIR / "quateman" / "natural-gas-national-grid.csv") as f:
s = f.read()

result = parser.parse_gas_bill_national_grid(s)

assert len(result.records) == 25
for row in result.records:
assert isinstance(row, NaturalGasBillingRecordInput)

# input: Natural gas billing,11/5/2020,12/3/2020,36,therms,$65.60 ,
# from excel: 11/6/2020,12/3/2020,28,36,,1,1.29,0.99

second_row = result.records[1]
assert second_row.period_start_date == date(2020, 11, 5)
assert second_row.period_end_date == date(2020, 12, 3)
assert isinstance(second_row.usage_therms, float)
assert second_row.usage_therms == 36
assert second_row.inclusion_override == None

0 comments on commit 793cacc

Please sign in to comment.