diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 00000000..5c7c50a6
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,19 @@
+name: Lint
+
+on:
+  push:
+  pull_request:
+    branches:
+      - development
+      - main
+
+jobs:
+  formatting:
+    if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+      - run: python -m pip install --upgrade ruff
+      - run: ruff check . --diff
+      - run: ruff format . --diff
diff --git a/notebooks/explore_data/compare_data_sources.ipynb b/notebooks/explore_data/compare_data_sources.ipynb
index 886855af..41ee76c2 100644
--- a/notebooks/explore_data/compare_data_sources.ipynb
+++ b/notebooks/explore_data/compare_data_sources.ipynb
@@ -11,7 +11,7 @@
     "# Depending on how your jupyter handles working directories, this may not be needed.\n",
     "import sys\n",
     "\n",
-    "sys.path.append(\"../../../open-grid-emissions/\")\n"
+    "sys.path.append(\"../../../open-grid-emissions/\")"
    ]
   },
   {
@@ -57,7 +57,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "year = 2019\n"
+    "year = 2019"
    ]
   },
   {
@@ -142,7 +142,7 @@
     "    col.replace(\"Net Generation (MW) from \", \"\") for col in eia_930.columns\n",
     "]\n",
     "\n",
-    "eia_930.head(3)\n"
+    "eia_930.head(3)"
    ]
   },
   {
@@ -171,7 +171,7 @@
     "    \"Natural Gas\",\n",
     "    \"All Petroleum Products\",\n",
     "    \"Other Fuel Sources\",\n",
-    "]\n"
+    "]"
    ]
   },
   {
@@ -185,7 +185,7 @@
     "\n",
     "# replace negative values with NaN\n",
     "for col in emitting_gen_columns:\n",
-    "    eia_930.loc[eia_930[col] < 0, col] = np.nan\n"
+    "    eia_930.loc[eia_930[col] < 0, col] = np.nan"
    ]
   },
   {
@@ -201,7 +201,7 @@
     "# Calculate hourly EIA-930 non-renewable generation\n",
     "eia_930[\"total_net_generation\"] = eia_930[net_gen_columns].sum(axis=1)\n",
     "\n",
-    "eia_930.head(3)\n"
+    "eia_930.head(3)"
    ]
   },
   {
@@ -215,7 +215,7 @@
     "    eia_930.groupby(\"Balancing Authority\").sum()[\"emitting_net_generation\"]\n",
     ")\n",
     "annual_930 = annual_930.rename(columns={\"emitting_net_generation\": \"EIA-930\"})\n",
-    "annual_930.head()\n"
+    "annual_930.head()"
    ]
   },
   {
@@ -247,7 +247,7 @@
     "                           AND report_date <= '{year}-12-30'\",\n",
     "    pudl_engine,\n",
     ")\n",
-    "gen_fuel_923.head()\n"
+    "gen_fuel_923.head()"
    ]
   },
   {
@@ -264,7 +264,7 @@
     "fuel_code_dict_pudl = pd.Series(\n",
     "    fuel_code_dict_pudl.fuel_type_code_pudl.values,\n",
     "    index=fuel_code_dict_pudl.energy_source_code,\n",
-    ").to_dict()\n"
+    ").to_dict()"
    ]
   },
   {
@@ -336,7 +336,7 @@
     "# Add ba code to generation_fuel_eia923\n",
     "gen_fuel_923 = gen_fuel_923.merge(plants_ba, how=\"left\", on=\"plant_id_eia\")\n",
     "\n",
-    "gen_fuel_923.head()\n"
+    "gen_fuel_923.head()"
    ]
   },
   {
@@ -350,7 +350,7 @@
     "    gen_fuel_923.groupby(\"balancing_authority_code_eia\").sum()[\"net_generation_mwh\"]\n",
     ")\n",
     "annual_923 = annual_923.rename(columns={\"net_generation_mwh\": \"EIA-923\"})\n",
-    "annual_923.head()\n"
+    "annual_923.head()"
    ]
   },
   {
@@ -362,7 +362,7 @@
    "source": [
     "compare_annual_923_930 = annual_923.merge(\n",
     "    annual_930, how=\"outer\", left_index=True, right_index=True\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -373,7 +373,7 @@
    "outputs": [],
    "source": [
     "# identify which BAs are missing from one or another dataset\n",
-    "compare_annual_923_930[compare_annual_923_930.isna().any(axis=1)]\n"
+    "compare_annual_923_930[compare_annual_923_930.isna().any(axis=1)]"
    ]
   },
   {
@@ -384,7 +384,7 @@
    "outputs": [],
    "source": [
     "# let's compare each BA\n",
-    "px.scatter(compare_annual_923_930)\n"
+    "px.scatter(compare_annual_923_930)"
    ]
   },
   {
@@ -404,7 +404,7 @@
     "    percent_diff_923_930,\n",
     "    title=\"percent difference between emitting net generation in EIA-930 and EIA-923\",\n",
     "    labels={\"value\": \"% change from EIA-923\"},\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -430,7 +430,7 @@
     "annual_923_by_fuel = annual_923_by_fuel.rename(\n",
     "    columns={\"net_generation_mwh\": \"EIA-923\"}\n",
     ")\n",
-    "annual_923_by_fuel.head(3)\n"
+    "annual_923_by_fuel.head(3)"
    ]
   },
   {
@@ -457,7 +457,7 @@
     "annual_930_by_fuel = annual_930_by_fuel.rename(\n",
     "    columns={\"Balancing Authority\": \"balancing_authority_code_eia\"}\n",
     ")\n",
-    "annual_930_by_fuel\n"
+    "annual_930_by_fuel"
    ]
   },
   {
@@ -481,7 +481,7 @@
     "    facet_col_wrap=1,\n",
     "    height=1000,\n",
     "    title=\"comparison of net generation by fuel type for each BA\",\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -498,7 +498,7 @@
     "percent_error[\"percent_error\"] = (\n",
     "    percent_error[\"EIA-930\"] - percent_error[\"EIA-923\"]\n",
     ") / percent_error[\"EIA-923\"]\n",
-    "percent_error\n"
+    "percent_error"
    ]
   },
   {
@@ -548,7 +548,7 @@
     "hourly_net_emissions = pd.read_csv(\n",
     "    \"../data/outputs/hourly_net_emission.csv\", index_col=0, parse_dates=True\n",
     ")\n",
-    "hourly_emission_rate.head()\n"
+    "hourly_emission_rate.head()"
    ]
   },
   {
@@ -558,7 +558,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "eia_930.head()\n"
+    "eia_930.head()"
    ]
   },
   {
@@ -589,7 +589,7 @@
     ")\n",
     "ax2.set_title(ba)\n",
     "ax2.legend()\n",
-    "ax2.set_xlim(parse_dt(\"2019-08-01\"), parse_dt(\"2019-08-10\"))\n"
+    "ax2.set_xlim(parse_dt(\"2019-08-01\"), parse_dt(\"2019-08-10\"))"
    ]
   },
   {
@@ -610,7 +610,7 @@
     ")\n",
     "data_for_plot = data_for_plot.rename(columns={ba: \"CEMS\"})\n",
     "\n",
-    "px.line(data_for_plot, title=f\"Net generation in {ba} EIA-930 vs CEMS\")\n"
+    "px.line(data_for_plot, title=f\"Net generation in {ba} EIA-930 vs CEMS\")"
    ]
   },
   {
@@ -632,7 +632,7 @@
    "source": [
     "# For annual comparison graphs, see below with eGRID\n",
     "annual_eia_930 = eia_930.groupby(\"Balancing Authority\").sum()[\"emitting_net_generation\"]\n",
-    "annual_eia_930.head()\n"
+    "annual_eia_930.head()"
    ]
   },
   {
@@ -663,7 +663,7 @@
     "                           AND report_date <= '{year}-12-30'\",\n",
     "    pudl_engine,\n",
     ")\n",
-    "gen_923.head()\n"
+    "gen_923.head()"
    ]
   },
   {
@@ -682,7 +682,7 @@
     "otherway = np.setdiff1d(\n",
     "    gen_fuel_923[\"plant_id_eia\"].unique(), gen_923[\"plant_id_eia\"].unique()\n",
     ")\n",
-    "print(f\"{len(oneway)} plants in generation_fuel_eia923 are not in generation_eia923\")\n"
+    "print(f\"{len(oneway)} plants in generation_fuel_eia923 are not in generation_eia923\")"
    ]
   },
   {
@@ -731,7 +731,7 @@
     "    header=1,\n",
     "    index_col=\"BACODE\",\n",
     ")\n",
-    "egrid.head()\n"
+    "egrid.head()"
    ]
   },
   {
@@ -753,7 +753,7 @@
     "        (egrid_data_code_to_name.loc[0, name], name)\n",
     "        for name in egrid_data_code_to_name.columns\n",
     "    ]\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -767,7 +767,7 @@
     "annual_generation = hourly_net_generation.sum(axis=0).rename(\"hourly\")\n",
     "annual_generation = egrid.merge(\n",
     "    annual_generation, how=\"right\", left_index=True, right_index=True\n",
-    ").loc[:, [\"hourly\", \"BAGENACY\"]]\n"
+    ").loc[:, [\"hourly\", \"BAGENACY\"]]"
    ]
   },
   {
@@ -783,7 +783,7 @@
     ")\n",
     "annual_generation = annual_generation.merge(\n",
     "    annual_923, how=\"left\", left_index=True, right_index=True\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -793,7 +793,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "annual_generation.head()\n"
+    "annual_generation.head()"
    ]
   },
   {
@@ -860,7 +860,7 @@
     "ax3.hlines([1.0], -0.5, len(annual_generation) - 0.5, color=\"r\")\n",
     "ax3.set_ylabel(\"Fraction of 930 generation captured by CEMS\")\n",
     "ax3.set_ylim(0, 2)\n",
-    "ax3.set_title(\"EIA-930 vs OGEI\")\n"
+    "ax3.set_title(\"EIA-930 vs OGEI\")"
    ]
   },
   {
@@ -872,7 +872,7 @@
    "source": [
     "# Many small BAs have terrible coverage of EIA-930 data, resulting in low annually aggregated 930 numbers.\n",
     "# The worst discrepencies between 930 and eGRID:\n",
-    "annual_generation.loc[[\"FPL\", \"IPCO\", \"NEVP\", \"SC\", \"TEC\", \"TVA\"], :]\n"
+    "annual_generation.loc[[\"FPL\", \"IPCO\", \"NEVP\", \"SC\", \"TEC\", \"TVA\"], :]"
    ]
   },
   {
@@ -900,7 +900,7 @@
     "ax.set_xticklabels(labels=annual_generation.index, rotation=90)\n",
     "ax.hlines([1.0], -0.5, len(annual_generation) - 0.5, color=\"r\")\n",
     "ax.set_ylabel(\"Fraction of eGRID non-renewable generation captured by 923\")\n",
-    "ax.set_title(\"923 vs eGRID\")\n"
+    "ax.set_title(\"923 vs eGRID\")"
    ]
   },
   {
diff --git a/notebooks/explore_data/explore_annually_reported_eia_data.ipynb b/notebooks/explore_data/explore_annually_reported_eia_data.ipynb
index af672e8c..b2b32a2c 100644
--- a/notebooks/explore_data/explore_annually_reported_eia_data.ipynb
+++ b/notebooks/explore_data/explore_annually_reported_eia_data.ipynb
@@ -14,7 +14,8 @@
     "\n",
     "# # Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
     "\n",
     "from column_checks import get_dtypes\n",
     "import load_data\n",
@@ -39,11 +40,17 @@
    "source": [
     "pudl_out = load_data.initialize_pudl_out(year)\n",
     "\n",
-    "plant_frequency = pudl_out.plants_eia860()[[\"plant_id_eia\",\"respondent_frequency\"]]\n",
+    "plant_frequency = pudl_out.plants_eia860()[[\"plant_id_eia\", \"respondent_frequency\"]]\n",
     "\n",
     "# load the allocated EIA data\n",
-    "eia923_allocated = pd.read_csv(f'{outputs_folder()}{path_prefix}/eia923_allocated_{year}.csv', dtype=get_dtypes(), parse_dates=['report_date'])\n",
-    "eia923_allocated = eia923_allocated.merge(plant_frequency, how=\"left\", on=\"plant_id_eia\", validate=\"m:1\")"
+    "eia923_allocated = pd.read_csv(\n",
+    "    f\"{outputs_folder()}{path_prefix}/eia923_allocated_{year}.csv\",\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"report_date\"],\n",
+    ")\n",
+    "eia923_allocated = eia923_allocated.merge(\n",
+    "    plant_frequency, how=\"left\", on=\"plant_id_eia\", validate=\"m:1\"\n",
+    ")"
    ]
   },
   {
@@ -52,7 +59,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_from_annual = eia923_allocated.groupby([\"respondent_frequency\"], dropna=False)[[\"fuel_consumed_mmbtu\", \"net_generation_mwh\",\"co2_mass_lb\"]].sum() / eia923_allocated[[\"fuel_consumed_mmbtu\", \"net_generation_mwh\",\"co2_mass_lb\"]].sum() * 100\n",
+    "data_from_annual = (\n",
+    "    eia923_allocated.groupby([\"respondent_frequency\"], dropna=False)[\n",
+    "        [\"fuel_consumed_mmbtu\", \"net_generation_mwh\", \"co2_mass_lb\"]\n",
+    "    ].sum()\n",
+    "    / eia923_allocated[\n",
+    "        [\"fuel_consumed_mmbtu\", \"net_generation_mwh\", \"co2_mass_lb\"]\n",
+    "    ].sum()\n",
+    "    * 100\n",
+    ")\n",
     "data_from_annual.loc[\"Total Percent\"] = data_from_annual.sum()\n",
     "data_from_annual"
    ]
@@ -63,7 +78,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_from_annual.loc[\"A\",:].rename(\"% of EIA-923 input data from EIA annual reporters\")"
+    "data_from_annual.loc[\"A\", :].rename(\"% of EIA-923 input data from EIA annual reporters\")"
    ]
   },
   {
@@ -80,7 +95,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "annual_eia_used = eia923_allocated[eia923_allocated[\"hourly_data_source\"] != \"cems\"].groupby([\"respondent_frequency\"], dropna=False)[[\"fuel_consumed_mmbtu\", \"net_generation_mwh\",\"co2_mass_lb\"]].sum() / eia923_allocated[[\"fuel_consumed_mmbtu\", \"net_generation_mwh\",\"co2_mass_lb\"]].sum() * 100\n",
+    "annual_eia_used = (\n",
+    "    eia923_allocated[eia923_allocated[\"hourly_data_source\"] != \"cems\"]\n",
+    "    .groupby([\"respondent_frequency\"], dropna=False)[\n",
+    "        [\"fuel_consumed_mmbtu\", \"net_generation_mwh\", \"co2_mass_lb\"]\n",
+    "    ]\n",
+    "    .sum()\n",
+    "    / eia923_allocated[\n",
+    "        [\"fuel_consumed_mmbtu\", \"net_generation_mwh\", \"co2_mass_lb\"]\n",
+    "    ].sum()\n",
+    "    * 100\n",
+    ")\n",
     "annual_eia_used.loc[\"Total Percent\"] = annual_eia_used.sum()\n",
     "annual_eia_used"
    ]
@@ -91,7 +116,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "annual_eia_used.loc[\"A\",:].rename(\"% of output data from EIA annual reporters\")"
+    "annual_eia_used.loc[\"A\", :].rename(\"% of output data from EIA annual reporters\")"
    ]
   },
   {
@@ -108,15 +133,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "multi_source_subplants = eia923_allocated[\n",
-    "    [\"plant_id_eia\", \"subplant_id\", \"hourly_data_source\"]\n",
-    "].drop_duplicates().drop(columns=\"hourly_data_source\")\n",
+    "multi_source_subplants = (\n",
+    "    eia923_allocated[[\"plant_id_eia\", \"subplant_id\", \"hourly_data_source\"]]\n",
+    "    .drop_duplicates()\n",
+    "    .drop(columns=\"hourly_data_source\")\n",
+    ")\n",
     "\n",
     "multi_source_subplants = multi_source_subplants[\n",
-    "            multi_source_subplants.duplicated(\n",
-    "                subset=[\"plant_id_eia\", \"subplant_id\"])]\n",
+    "    multi_source_subplants.duplicated(subset=[\"plant_id_eia\", \"subplant_id\"])\n",
+    "]\n",
     "\n",
-    "multi_source_subplants = eia923_allocated.merge(multi_source_subplants, how=\"inner\", on=[\"plant_id_eia\", \"subplant_id\"])\n"
+    "multi_source_subplants = eia923_allocated.merge(\n",
+    "    multi_source_subplants, how=\"inner\", on=[\"plant_id_eia\", \"subplant_id\"]\n",
+    ")"
    ]
   },
   {
@@ -126,7 +155,15 @@
    "outputs": [],
    "source": [
     "# what percent of the total EIA-923 data comes from subplants with annually-reported data and multiple sources?\n",
-    "multi_source_summary = (multi_source_subplants.groupby([\"respondent_frequency\"], dropna=False)[[\"fuel_consumed_mmbtu\", \"net_generation_mwh\",\"co2_mass_lb\"]].sum() / eia923_allocated[[\"fuel_consumed_mmbtu\", \"net_generation_mwh\",\"co2_mass_lb\"]].sum() * 100)\n",
+    "multi_source_summary = (\n",
+    "    multi_source_subplants.groupby([\"respondent_frequency\"], dropna=False)[\n",
+    "        [\"fuel_consumed_mmbtu\", \"net_generation_mwh\", \"co2_mass_lb\"]\n",
+    "    ].sum()\n",
+    "    / eia923_allocated[\n",
+    "        [\"fuel_consumed_mmbtu\", \"net_generation_mwh\", \"co2_mass_lb\"]\n",
+    "    ].sum()\n",
+    "    * 100\n",
+    ")\n",
     "multi_source_summary.loc[\"Total Percent\"] = multi_source_summary.sum()\n",
     "multi_source_summary"
    ]
@@ -137,7 +174,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "multi_source_summary.loc[\"A\",:].rename(\"% of output data mixing CEMS and annually-reported EIA data\")"
+    "multi_source_summary.loc[\"A\", :].rename(\n",
+    "    \"% of output data mixing CEMS and annually-reported EIA data\"\n",
+    ")"
    ]
   },
   {
@@ -146,7 +185,26 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pd.concat([pd.DataFrame(data_from_annual.loc[\"A\",:].rename(\"% of EIA-923 input data from EIA annual reporters\").round(2)).T, pd.DataFrame(annual_eia_used.loc[\"A\",:].rename(\"% of output data from EIA annual reporters\").round(2)).T, pd.DataFrame(multi_source_summary.loc[\"A\",:].rename(\"% of output data mixing CEMS and annually-reported EIA data\").round(2)).T], axis=0)"
+    "pd.concat(\n",
+    "    [\n",
+    "        pd.DataFrame(\n",
+    "            data_from_annual.loc[\"A\", :]\n",
+    "            .rename(\"% of EIA-923 input data from EIA annual reporters\")\n",
+    "            .round(2)\n",
+    "        ).T,\n",
+    "        pd.DataFrame(\n",
+    "            annual_eia_used.loc[\"A\", :]\n",
+    "            .rename(\"% of output data from EIA annual reporters\")\n",
+    "            .round(2)\n",
+    "        ).T,\n",
+    "        pd.DataFrame(\n",
+    "            multi_source_summary.loc[\"A\", :]\n",
+    "            .rename(\"% of output data mixing CEMS and annually-reported EIA data\")\n",
+    "            .round(2)\n",
+    "        ).T,\n",
+    "    ],\n",
+    "    axis=0,\n",
+    ")"
    ]
   }
  ],
diff --git a/notebooks/explore_data/explore_intermediate_outputs.ipynb b/notebooks/explore_data/explore_intermediate_outputs.ipynb
index e76796cd..ac41bb30 100644
--- a/notebooks/explore_data/explore_intermediate_outputs.ipynb
+++ b/notebooks/explore_data/explore_intermediate_outputs.ipynb
@@ -14,7 +14,8 @@
     "\n",
     "# # Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
     "\n",
     "from column_checks import get_dtypes\n",
     "from filepaths import *\n",
@@ -40,11 +41,30 @@
     "year = 2020\n",
     "path_prefix = f\"{year}/\"\n",
     "\n",
-    "cems = pd.read_csv(outputs_folder(f\"{path_prefix}/cems_subplant_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
-    "partial_cems_plant = pd.read_csv(outputs_folder(f\"{path_prefix}/partial_cems_plant_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
-    "partial_cems_subplant = pd.read_csv(outputs_folder(f\"{path_prefix}/partial_cems_subplant_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
-    "eia923_allocated = pd.read_csv(outputs_folder(f\"{path_prefix}/eia923_allocated_{year}.csv\"), dtype=get_dtypes(), parse_dates=['report_date'])\n",
-    "plant_attributes = pd.read_csv(outputs_folder(f\"{path_prefix}/plant_static_attributes_{year}.csv\"), dtype=get_dtypes())"
+    "cems = pd.read_csv(\n",
+    "    outputs_folder(f\"{path_prefix}/cems_subplant_{year}.csv\"),\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"datetime_utc\", \"report_date\"],\n",
+    ")\n",
+    "partial_cems_plant = pd.read_csv(\n",
+    "    outputs_folder(f\"{path_prefix}/partial_cems_plant_{year}.csv\"),\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"datetime_utc\", \"report_date\"],\n",
+    ")\n",
+    "partial_cems_subplant = pd.read_csv(\n",
+    "    outputs_folder(f\"{path_prefix}/partial_cems_subplant_{year}.csv\"),\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"datetime_utc\", \"report_date\"],\n",
+    ")\n",
+    "eia923_allocated = pd.read_csv(\n",
+    "    outputs_folder(f\"{path_prefix}/eia923_allocated_{year}.csv\"),\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"report_date\"],\n",
+    ")\n",
+    "plant_attributes = pd.read_csv(\n",
+    "    outputs_folder(f\"{path_prefix}/plant_static_attributes_{year}.csv\"),\n",
+    "    dtype=get_dtypes(),\n",
+    ")"
    ]
   },
   {
@@ -69,7 +89,7 @@
    "source": [
     "data = cems[cems[\"plant_id_eia\"] == 673]\n",
     "\n",
-    "data.groupby([\"plant_id_eia\",\"unitid\",\"report_date\"]).sum()"
+    "data.groupby([\"plant_id_eia\", \"unitid\", \"report_date\"]).sum()"
    ]
   },
   {
@@ -100,7 +120,9 @@
     "\n",
     "all_data = []\n",
     "for ba in os.listdir(results_folder(f\"2021/power_sector_data/{resolution}/us_units\")):\n",
-    "    df = pd.read_csv(results_folder(f\"2021/power_sector_data/{resolution}/us_units/{ba}\"))\n",
+    "    df = pd.read_csv(\n",
+    "        results_folder(f\"2021/power_sector_data/{resolution}/us_units/{ba}\")\n",
+    "    )\n",
     "    df[\"ba_code\"] = ba.split(\".\")[0]\n",
     "    all_data.append(df)\n",
     "\n",
@@ -120,10 +142,13 @@
     "all_data[\"nox_mass_lb_for_electricity\"] / all_data[\"net_generation_mwh\"]\n",
     "all_data[\"so2_mass_lb_for_electricity\"] / all_data[\"net_generation_mwh\"]\n",
     "\n",
-    "for pol in [\"co2\",\"nox\",\"so2\"]:\n",
-    "    for fuel in [\"biomass\", \"geothermal\",\"waste\", \"other\"]:\n",
-    "        calc = all_data.loc[fuel,f\"{pol}_mass_lb_for_electricity\"] / all_data.loc[\"total\",f\"{pol}_mass_lb_for_electricity\"]\n",
-    "        print(f\"{pol} {fuel}: {calc}\")\n"
+    "for pol in [\"co2\", \"nox\", \"so2\"]:\n",
+    "    for fuel in [\"biomass\", \"geothermal\", \"waste\", \"other\"]:\n",
+    "        calc = (\n",
+    "            all_data.loc[fuel, f\"{pol}_mass_lb_for_electricity\"]\n",
+    "            / all_data.loc[\"total\", f\"{pol}_mass_lb_for_electricity\"]\n",
+    "        )\n",
+    "        print(f\"{pol} {fuel}: {calc}\")"
    ]
   },
   {
@@ -139,10 +164,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "all_data[\"pctdiff\"] = (all_data.generated_co2_rate_lb_per_mwh_for_electricity_adjusted - all_data.generated_co2_rate_lb_per_mwh_for_electricity) / all_data.generated_co2_rate_lb_per_mwh_for_electricity\n",
+    "all_data[\"pctdiff\"] = (\n",
+    "    all_data.generated_co2_rate_lb_per_mwh_for_electricity_adjusted\n",
+    "    - all_data.generated_co2_rate_lb_per_mwh_for_electricity\n",
+    ") / all_data.generated_co2_rate_lb_per_mwh_for_electricity\n",
     "\n",
     "\n",
-    "all_data.loc[all_data[\"fuel_category\"] == \"total\", [\"ba_code\",\"pctdiff\",\"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\",\"generated_co2_rate_lb_per_mwh_for_electricity\"]].sort_values(by=\"pctdiff\").head(25)"
+    "all_data.loc[\n",
+    "    all_data[\"fuel_category\"] == \"total\",\n",
+    "    [\n",
+    "        \"ba_code\",\n",
+    "        \"pctdiff\",\n",
+    "        \"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\",\n",
+    "        \"generated_co2_rate_lb_per_mwh_for_electricity\",\n",
+    "    ],\n",
+    "].sort_values(by=\"pctdiff\").head(25)"
    ]
   }
  ],
diff --git a/notebooks/explore_data/gens_not_in_cems.ipynb b/notebooks/explore_data/gens_not_in_cems.ipynb
index fbc76b6b..8b08bac8 100644
--- a/notebooks/explore_data/gens_not_in_cems.ipynb
+++ b/notebooks/explore_data/gens_not_in_cems.ipynb
@@ -17,7 +17,8 @@
     "\n",
     "# # Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
     "\n",
     "import download_data\n",
     "import load_data\n",
@@ -42,13 +43,32 @@
    "outputs": [],
    "source": [
     "# load inputs to function\n",
-    "cems = pd.read_csv(outputs_folder(f\"{path_prefix}/cems_subplant_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
-    "partial_cems_plant = pd.read_csv(outputs_folder(f\"{path_prefix}/partial_cems_plant_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
-    "partial_cems_subplant = pd.read_csv(outputs_folder(f\"{path_prefix}/partial_cems_subplant_{year}.csv\"), dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
-    "eia923_allocated = pd.read_csv(outputs_folder(f\"{path_prefix}/eia923_allocated_{year}.csv\"), dtype=get_dtypes(), parse_dates=['report_date'])\n",
-    "plant_attributes = pd.read_csv(outputs_folder(f\"{path_prefix}/plant_static_attributes_{year}.csv\"), dtype=get_dtypes())\n",
+    "cems = pd.read_csv(\n",
+    "    outputs_folder(f\"{path_prefix}/cems_subplant_{year}.csv\"),\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"datetime_utc\", \"report_date\"],\n",
+    ")\n",
+    "partial_cems_plant = pd.read_csv(\n",
+    "    outputs_folder(f\"{path_prefix}/partial_cems_plant_{year}.csv\"),\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"datetime_utc\", \"report_date\"],\n",
+    ")\n",
+    "partial_cems_subplant = pd.read_csv(\n",
+    "    outputs_folder(f\"{path_prefix}/partial_cems_subplant_{year}.csv\"),\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"datetime_utc\", \"report_date\"],\n",
+    ")\n",
+    "eia923_allocated = pd.read_csv(\n",
+    "    outputs_folder(f\"{path_prefix}/eia923_allocated_{year}.csv\"),\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"report_date\"],\n",
+    ")\n",
+    "plant_attributes = pd.read_csv(\n",
+    "    outputs_folder(f\"{path_prefix}/plant_static_attributes_{year}.csv\"),\n",
+    "    dtype=get_dtypes(),\n",
+    ")\n",
     "\n",
-    "# select eia only data \n",
+    "# select eia only data\n",
     "eia_only_data = eia923_allocated[\n",
     "    (eia923_allocated[\"hourly_data_source\"] == \"eia\")\n",
     "    & ~(eia923_allocated[\"fuel_consumed_mmbtu\"].isna())\n",
@@ -84,7 +104,7 @@
     "    how=\"left\",\n",
     "    on=\"plant_id_eia\",\n",
     "    validate=\"m:1\",\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -121,7 +141,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "eia_caiso[\"nox_rate\"] = eia_caiso[\"nox_mass_lb_for_electricity\"] / eia_caiso[\"net_generation_mwh\"]\n",
+    "eia_caiso[\"nox_rate\"] = (\n",
+    "    eia_caiso[\"nox_mass_lb_for_electricity\"] / eia_caiso[\"net_generation_mwh\"]\n",
+    ")\n",
     "eia_caiso[\"nox_rate\"] = eia_caiso[\"nox_rate\"].replace(np.inf, np.nan)"
    ]
   },
@@ -131,7 +153,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "eia_caiso.groupby([\"prime_mover_code\",\"energy_source_code\",])[\"nox_mass_lb_for_electricity\"].sum()"
+    "eia_caiso.groupby(\n",
+    "    [\n",
+    "        \"prime_mover_code\",\n",
+    "        \"energy_source_code\",\n",
+    "    ]\n",
+    ")[\"nox_mass_lb_for_electricity\"].sum()"
    ]
   },
   {
@@ -177,7 +204,7 @@
     "subplant_nameplate = gross_to_net_generation.calculate_subplant_nameplate_capacity(year)\n",
     "\n",
     "pudl_out = load_data.initialize_pudl_out(year)\n",
-    "gen_cap = pudl_out.gens_eia860()[[\"plant_id_eia\",\"generator_id\",\"capacity_mw\"]]"
+    "gen_cap = pudl_out.gens_eia860()[[\"plant_id_eia\", \"generator_id\", \"capacity_mw\"]]"
    ]
   },
   {
@@ -186,8 +213,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "eia_cf = eia_only_data.merge(gen_cap, how=\"left\", on=[\"plant_id_eia\",\"generator_id\"], validate=\"m:1\")\n",
-    "eia_cf[\"capfac\"] = eia_cf.net_generation_mwh / (eia_cf.report_date.dt.days_in_month * 24 * eia_cf.capacity_mw)\n",
+    "eia_cf = eia_only_data.merge(\n",
+    "    gen_cap, how=\"left\", on=[\"plant_id_eia\", \"generator_id\"], validate=\"m:1\"\n",
+    ")\n",
+    "eia_cf[\"capfac\"] = eia_cf.net_generation_mwh / (\n",
+    "    eia_cf.report_date.dt.days_in_month * 24 * eia_cf.capacity_mw\n",
+    ")\n",
     "eia_cf.loc[eia_cf[\"capfac\"] > 1.2, \"capfac\"] = np.NaN\n",
     "eia_cf.loc[eia_cf[\"capfac\"] < 0, \"capfac\"] = np.NaN\n",
     "eia_cf"
@@ -199,7 +230,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "px.histogram(eia_cf, x=\"capfac\", nbins=15, histnorm=\"percent\", width=500).update_xaxes(dtick=0.05)"
+    "px.histogram(eia_cf, x=\"capfac\", nbins=15, histnorm=\"percent\", width=500).update_xaxes(\n",
+    "    dtick=0.05\n",
+    ")"
    ]
   },
   {
@@ -208,10 +241,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems_cf = cems.merge(subplant_nameplate, how=\"left\", on=[\"plant_id_eia\",\"subplant_id\"])\n",
-    "cems_cf = cems_cf.groupby([\"plant_id_eia\",\"subplant_id\"])[[\"net_generation_mwh\",\"capacity_mw\"]].sum()\n",
+    "cems_cf = cems.merge(subplant_nameplate, how=\"left\", on=[\"plant_id_eia\", \"subplant_id\"])\n",
+    "cems_cf = cems_cf.groupby([\"plant_id_eia\", \"subplant_id\"])[\n",
+    "    [\"net_generation_mwh\", \"capacity_mw\"]\n",
+    "].sum()\n",
     "cems_cf = cems_cf[cems_cf[\"capacity_mw\"] > 0]\n",
-    "cems_cf['capfac'] = cems_cf['net_generation_mwh'] / cems_cf['capacity_mw']\n",
+    "cems_cf[\"capfac\"] = cems_cf[\"net_generation_mwh\"] / cems_cf[\"capacity_mw\"]\n",
     "cems_cf.loc[cems_cf[\"capfac\"] > 1.2, \"capfac\"] = np.NaN\n",
     "cems_cf.loc[cems_cf[\"capfac\"] < 0, \"capfac\"] = np.NaN\n",
     "cems_cf"
@@ -223,7 +258,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "px.histogram(cems_cf, x=\"capfac\", nbins=15, histnorm=\"percent\", width=500).update_xaxes(dtick=0.05)"
+    "px.histogram(cems_cf, x=\"capfac\", nbins=15, histnorm=\"percent\", width=500).update_xaxes(\n",
+    "    dtick=0.05\n",
+    ")"
    ]
   }
  ],
diff --git a/notebooks/explore_data/manually_import_uncontrolled_emission_factors.ipynb b/notebooks/explore_data/manually_import_uncontrolled_emission_factors.ipynb
index 89945fde..c288b336 100644
--- a/notebooks/explore_data/manually_import_uncontrolled_emission_factors.ipynb
+++ b/notebooks/explore_data/manually_import_uncontrolled_emission_factors.ipynb
@@ -40,31 +40,31 @@
     "    https://catalystcoop-pudl.readthedocs.io/en/latest/data_dictionaries/codes_and_labels.html\n",
     "    \"\"\"\n",
     "    column_names = [\n",
-    "        'readable_fuel_name',\n",
-    "        'eia_fuel_code',\n",
-    "        'source_and_tables',\n",
-    "        'emissions_units',\n",
-    "        'cyclone_firing_boiler',\n",
-    "        'fluidized_bed_firing_boiler',\n",
-    "        'stoker_boiler',\n",
-    "        'tangential_firing_boiler',\n",
-    "        'all_other_boiler_types',\n",
-    "        'combustion_turbine',\n",
-    "        'internal_combustion_engine'\n",
+    "        \"readable_fuel_name\",\n",
+    "        \"eia_fuel_code\",\n",
+    "        \"source_and_tables\",\n",
+    "        \"emissions_units\",\n",
+    "        \"cyclone_firing_boiler\",\n",
+    "        \"fluidized_bed_firing_boiler\",\n",
+    "        \"stoker_boiler\",\n",
+    "        \"tangential_firing_boiler\",\n",
+    "        \"all_other_boiler_types\",\n",
+    "        \"combustion_turbine\",\n",
+    "        \"internal_combustion_engine\",\n",
     "    ]\n",
     "\n",
     "    column_dtypes = {\n",
-    "        'readable_fuel_name': 'str',\n",
-    "        'eia_fuel_code': 'str',\n",
-    "        'source_and_tables': 'str',\n",
-    "        'emissions_units': 'str',\n",
+    "        \"readable_fuel_name\": \"str\",\n",
+    "        \"eia_fuel_code\": \"str\",\n",
+    "        \"source_and_tables\": \"str\",\n",
+    "        \"emissions_units\": \"str\",\n",
     "    }\n",
-    "    \n",
+    "\n",
     "    # Every other column is a float.\n",
     "    for column_name in column_names:\n",
     "        if column_name not in column_dtypes:\n",
-    "            column_dtypes[column_name] = 'float64'\n",
-    "    \n",
+    "            column_dtypes[column_name] = \"float64\"\n",
+    "\n",
     "    # NOTE(milo): Header starts on 2 for this one!\n",
     "    df = pd.read_excel(\n",
     "        io=path_to_xlsx,\n",
@@ -89,35 +89,35 @@
     "def load_nox_uncontrolled_efs(path_to_xlsx):\n",
     "    \"\"\"\n",
     "    https://catalystcoop-pudl.readthedocs.io/en/latest/data_dictionaries/codes_and_labels.html\n",
-    "    \"\"\"    \n",
+    "    \"\"\"\n",
     "    column_names = [\n",
-    "        'readable_fuel_name',\n",
-    "        'eia_fuel_code',\n",
-    "        'source_and_tables',\n",
-    "        'emissions_units',\n",
-    "        'cyclone_firing_boiler',\n",
-    "        'fluidized_bed_firing_boiler',\n",
-    "        'stoker_boiler',\n",
-    "        'tangential_firing_boiler_dry_bottom',\n",
-    "        'tangential_firing_boiler_wet_bottom',\n",
-    "        'all_other_boiler_types_dry_bottom',\n",
-    "        'all_other_boiler_types_wet_bottom',\n",
-    "        'combustion_turbine',\n",
-    "        'internal_combustion_engine'\n",
+    "        \"readable_fuel_name\",\n",
+    "        \"eia_fuel_code\",\n",
+    "        \"source_and_tables\",\n",
+    "        \"emissions_units\",\n",
+    "        \"cyclone_firing_boiler\",\n",
+    "        \"fluidized_bed_firing_boiler\",\n",
+    "        \"stoker_boiler\",\n",
+    "        \"tangential_firing_boiler_dry_bottom\",\n",
+    "        \"tangential_firing_boiler_wet_bottom\",\n",
+    "        \"all_other_boiler_types_dry_bottom\",\n",
+    "        \"all_other_boiler_types_wet_bottom\",\n",
+    "        \"combustion_turbine\",\n",
+    "        \"internal_combustion_engine\",\n",
     "    ]\n",
-    "    \n",
+    "\n",
     "    column_dtypes = {\n",
-    "        'readable_fuel_name': 'str',\n",
-    "        'eia_fuel_code': 'str',\n",
-    "        'source_and_tables': 'str',\n",
-    "        'emissions_units': 'str',\n",
+    "        \"readable_fuel_name\": \"str\",\n",
+    "        \"eia_fuel_code\": \"str\",\n",
+    "        \"source_and_tables\": \"str\",\n",
+    "        \"emissions_units\": \"str\",\n",
     "    }\n",
     "\n",
     "    # Every other column is a float.\n",
     "    for column_name in column_names:\n",
     "        if column_name not in column_dtypes:\n",
-    "            column_dtypes[column_name] = 'float64'\n",
-    "    \n",
+    "            column_dtypes[column_name] = \"float64\"\n",
+    "\n",
     "    # NOTE(milo): Header starts on 3 for this one!\n",
     "    df = pd.read_excel(\n",
     "        io=path_to_xlsx,\n",
@@ -139,9 +139,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "base_folder = filepaths.manual_folder('eia_electric_power_annual')\n",
-    "so2_uncontrolled_efs_path = os.path.join(base_folder, 'epa_a_01_so2_uncontrolled_efs.xlsx')\n",
-    "nox_uncontrolled_efs_path = os.path.join(base_folder, 'epa_a_02_nox_uncontrolled_efs.xlsx')"
+    "base_folder = filepaths.manual_folder(\"eia_electric_power_annual\")\n",
+    "so2_uncontrolled_efs_path = os.path.join(\n",
+    "    base_folder, \"epa_a_01_so2_uncontrolled_efs.xlsx\"\n",
+    ")\n",
+    "nox_uncontrolled_efs_path = os.path.join(\n",
+    "    base_folder, \"epa_a_02_nox_uncontrolled_efs.xlsx\"\n",
+    ")"
    ]
   },
   {
@@ -165,7 +169,7 @@
     "def make_so2_emission_factor_rows(input_df):\n",
     "    \"\"\"\n",
     "    Convert the EIA emission factors excel format into the one we store in emission_factors_for_so2.csv.\n",
-    "    \n",
+    "\n",
     "    Columns:\n",
     "        prime_mover_code,\n",
     "        energy_source_code,\n",
@@ -176,73 +180,72 @@
     "        multiply_by_sulfur_content\n",
     "    \"\"\"\n",
     "    min_column_idx = 4\n",
-    "    \n",
+    "\n",
     "    map_eia_units_to_ours = {\n",
-    "        'Lbs per MG': {\n",
-    "            'emission_factor_numerator': 'lb',\n",
-    "            'emission_factor_denominator': 'thousand gallons',\n",
+    "        \"Lbs per MG\": {\n",
+    "            \"emission_factor_numerator\": \"lb\",\n",
+    "            \"emission_factor_denominator\": \"thousand gallons\",\n",
+    "        },\n",
+    "        \"Lbs per MMCF\": {\n",
+    "            \"emission_factor_numerator\": \"lb\",\n",
+    "            \"emission_factor_denominator\": \"Mcf\",\n",
     "        },\n",
-    "        'Lbs per MMCF': {\n",
-    "            'emission_factor_numerator': 'lb',\n",
-    "            'emission_factor_denominator': 'Mcf',\n",
+    "        \"Lbs per ton\": {\n",
+    "            \"emission_factor_numerator\": \"lb\",\n",
+    "            \"emission_factor_denominator\": \"short ton\",\n",
     "        },\n",
-    "        'Lbs per ton': {\n",
-    "            'emission_factor_numerator': 'lb',\n",
-    "            'emission_factor_denominator': 'short ton',\n",
-    "        }\n",
     "    }\n",
-    "    \n",
+    "\n",
     "    map_eia_combustion_system_to_ours = {\n",
-    "        'cyclone_firing_boiler' : {\n",
-    "            'prime_mover_code': TODO\n",
-    "            'boiler_bottom_type': 'N/A',\n",
-    "            'boiler_firing_type': 'STOKER',\n",
-    "            'multiple_by_sulfur_content': 0,\n",
+    "        \"cyclone_firing_boiler\": {\n",
+    "            \"prime_mover_code\": \"TODO\",\n",
+    "            \"boiler_bottom_type\": \"N/A\",\n",
+    "            \"boiler_firing_type\": \"STOKER\",\n",
+    "            \"multiple_by_sulfur_content\": 0,\n",
     "        },\n",
-    "        'fluidized_bed_firing_boiler' : {\n",
-    "            'prime_mover_code': TODO\n",
-    "            'boiler_bottom_type': 'N/A',\n",
-    "            'boiler_firing_type': 'FLUIDIZED',\n",
-    "            'multiple_by_sulfur_content': 0,\n",
+    "        \"fluidized_bed_firing_boiler\": {\n",
+    "            \"prime_mover_code\": \"TODO\",\n",
+    "            \"boiler_bottom_type\": \"N/A\",\n",
+    "            \"boiler_firing_type\": \"FLUIDIZED\",\n",
+    "            \"multiple_by_sulfur_content\": 0,\n",
     "        },\n",
-    "        'stoker_boiler' : {\n",
-    "            'prime_mover_code': 'ST' # Steam\n",
-    "            'boiler_bottom_type': 'N/A',\n",
-    "            'boiler_firing_type': 'STOKER',\n",
-    "            'multiple_by_sulfur_content': 0,\n",
+    "        \"stoker_boiler\": {\n",
+    "            \"prime_mover_code\": \"ST\",  # Steam\n",
+    "            \"boiler_bottom_type\": \"N/A\",\n",
+    "            \"boiler_firing_type\": \"STOKER\",\n",
+    "            \"multiple_by_sulfur_content\": 0,\n",
     "        },\n",
-    "        'tangential_firing_boiler' : {\n",
-    "            'prime_mover_code': 'ST' # Steam\n",
-    "            'boiler_bottom_type': 'N/A',\n",
-    "            'boiler_firing_type': 'TANGENTIAL',\n",
-    "            'multiple_by_sulfur_content': 0,\n",
+    "        \"tangential_firing_boiler\": {\n",
+    "            \"prime_mover_code\": \"ST\",  # Steam\n",
+    "            \"boiler_bottom_type\": \"N/A\",\n",
+    "            \"boiler_firing_type\": \"TANGENTIAL\",\n",
+    "            \"multiple_by_sulfur_content\": 0,\n",
     "        },\n",
-    "        'all_other_boiler_types' : {\n",
-    "            'prime_mover_code': 'UNK',\n",
-    "            'boiler_firing_type': 'N/A',\n",
-    "            'multiply_by_sulfur_content': 0  \n",
+    "        \"all_other_boiler_types\": {\n",
+    "            \"prime_mover_code\": \"UNK\",\n",
+    "            \"boiler_firing_type\": \"N/A\",\n",
+    "            \"multiply_by_sulfur_content\": 0,\n",
     "        },\n",
-    "        'combustion_turbine' : {\n",
-    "            'prime_mover_code': 'GT', # Gas combustion turbine.\n",
-    "            'boiler_firing_type': 'N/A',\n",
-    "            'multiply_by_sulfur_content': 0\n",
+    "        \"combustion_turbine\": {\n",
+    "            \"prime_mover_code\": \"GT\",  # Gas combustion turbine.\n",
+    "            \"boiler_firing_type\": \"N/A\",\n",
+    "            \"multiply_by_sulfur_content\": 0,\n",
+    "        },\n",
+    "        \"internal_combustion_engine\": {\n",
+    "            \"prime_mover_code\": \"IC\",\n",
+    "            \"boiler_firing_type\": \"N/A\",\n",
+    "            \"multiply_by_sulfur_content\": 0,\n",
     "        },\n",
-    "        'internal_combustion_engine' : {\n",
-    "            'prime_mover_code': 'IC',\n",
-    "            'boiler_firing_type': 'N/A',\n",
-    "            'multiply_by_sulfur_content': 0\n",
-    "        }\n",
     "    }\n",
-    "    \n",
+    "\n",
     "    for i in range(len(input_df)):\n",
     "        row = input_df.iloc[i]\n",
-    "        units = row['emissions_units'].replace(' **', '') # Remove asterisks.\n",
+    "        units = row[\"emissions_units\"].replace(\" **\", \"\")  # Remove asterisks.\n",
     "        print(units)\n",
     "        mapped_units_dict = map_eia_units_to_ours[units]\n",
     "        for colname in input_df.columns[min_column_idx:]:\n",
     "            emission_factor = row[colname]\n",
-    "            print(colname, ':', emission_factor)\n",
-    "            "
+    "            print(colname, \":\", emission_factor)"
    ]
   },
   {
@@ -266,7 +269,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.10.4 ('open_grid_emissions')",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -280,7 +283,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9 | packaged by conda-forge | (main, Jan 11 2023, 15:15:40) [MSC v.1916 64 bit (AMD64)]"
+   "version": "3.10.4"
   },
   "vscode": {
    "interpreter": {
diff --git a/notebooks/explore_methods/calculate_residual_net_generation.ipynb b/notebooks/explore_methods/calculate_residual_net_generation.ipynb
index a7d3e415..f0373380 100644
--- a/notebooks/explore_methods/calculate_residual_net_generation.ipynb
+++ b/notebooks/explore_methods/calculate_residual_net_generation.ipynb
@@ -1,835 +1,841 @@
 {
-   "cells": [
-      {
-         "cell_type": "markdown",
-         "metadata": {},
-         "source": [
-            "# Cleaning of 930, analyze how cleaning affects residual profile"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# data source https://gridemissions.s3.us-east-2.amazonaws.com/EBA_elec.csv.gz\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "import numpy as np\n",
-            "import pandas as pd\n",
-            "\n",
-            "import plotly.express as px\n",
-            "import plotly.graph_objects as go\n",
-            "\n",
-            "import datetime as dt\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "%reload_ext autoreload\n",
-            "%autoreload 2\n",
-            "\n",
-            "# Tell python where to look for modules. \n",
-            "# Depending on how your jupyter handles working directories, this may not be needed.\n",
-            "import sys\n",
-            "sys.path.append('../../open-grid-emissions/')\n",
-            "\n",
-            "from src.visualization import day_hour_heatmap\n",
-            "from src.eia930 import fuel_code_map, reformat_chalendar, load_chalendar, load_chalendar_for_pipeline\n",
-            "from src.download_data import download_chalendar_files\n",
-            "from src.data_cleaning import distribute_monthly_eia_data_to_hourly\n",
-            "from src.impute_hourly_profiles import aggregate_for_residual, calculate_residual"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "year = 2020\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# Download data if not exists\n",
-            "download_chalendar_files()\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "original = load_chalendar_for_pipeline(\n",
-            "    \"../data/eia930/chalendar/EBA_adjusted_rolling.csv\", year=year\n",
-            ")\n",
-            "original.head(5)\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "cleaned = load_chalendar_for_pipeline(\n",
-            "    \"../data/eia930/chalendar/EBA_adjusted_elec.csv\", year=year\n",
-            ")\n",
-            "cleaned.head(5)\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# Name column same as CEMS. TODO: make eia930 output use this name\n",
-            "cleaned = cleaned.rename(columns={\"datetime_utc\": \"datetime_utc\"})\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# load hourly CEMS data outputted from main data pipeline\n",
-            "cems = pd.read_csv(\n",
-            "    f\"../data/outputs/cems_{year}.csv\",\n",
-            "    parse_dates=[\"datetime_utc\"],\n",
-            ")\n",
-            "cems.head(5)\n"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "metadata": {},
-         "source": [
-            "# Aggregate CEMS data and merge with EIA-930 data"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# # combine original and cleaned EIA-930 data\n",
-            "# combined_data = cleaned.merge(\n",
-            "#     original[[\"ba_code\", \"fuel_category\", \"datetime_utc\", \"net_generation_mwh_930\"]],\n",
-            "#     how=\"left\",\n",
-            "#     on=[\"ba_code\", \"fuel_category\", \"datetime_utc\"],\n",
-            "#     suffixes=(\"_clean\", \"_orig\"),\n",
-            "# )\n",
-            "# combined_data\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# Load plant attributes (including BA codes)\n",
-            "plant_attributes = pd.read_csv(\"../data/outputs/plant_static_attributes.csv\")\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "plant_attributes.head()\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "cems = cems.merge(plant_attributes, how=\"left\", on=\"plant_id_eia\")\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "aggregate_for_residual(cems, \"datetime_utc\", \"ba_code_physical\", transmission=True)\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# perform different groupby operations so that we can compare different ways of aggregating the cems data\n",
-            "\n",
-            "# aggregate all generation by commercial BA\n",
-            "cems_bac_all = (\n",
-            "    cems.groupby([\"ba_code\", \"fuel_category_eia930\", \"datetime_utc\"])[\n",
-            "        \"net_generation_mwh\"\n",
-            "    ]\n",
-            "    .sum()\n",
-            "    .reset_index()\n",
-            "    .rename(\n",
-            "        columns={\n",
-            "            \"fuel_category_eia930\": \"fuel_category\",\n",
-            "            \"net_generation_mwh\": \"net_generation_mwh_bac_all\",\n",
-            "        }\n",
-            "    )\n",
-            ")\n",
-            "\n",
-            "# aggregate all generation by physical BA\n",
-            "cems_bap_all = (\n",
-            "    cems.groupby([\"ba_code_physical\", \"fuel_category_eia930\", \"datetime_utc\"])[\n",
-            "        \"net_generation_mwh\"\n",
-            "    ]\n",
-            "    .sum()\n",
-            "    .reset_index()\n",
-            "    .rename(\n",
-            "        columns={\n",
-            "            \"fuel_category_eia930\": \"fuel_category\",\n",
-            "            \"ba_code_physical\": \"ba_code\",\n",
-            "            \"net_generation_mwh\": \"net_generation_mwh_bap_all\",\n",
-            "        }\n",
-            "    )\n",
-            ")\n",
-            "\n",
-            "# Aggregate transmission-connected generation by commercial BA\n",
-            "cems_bac_trans = (\n",
-            "    cems[cems[\"distribution_flag\"] is False]\n",
-            "    .groupby([\"ba_code\", \"fuel_category_eia930\", \"datetime_utc\"])[\"net_generation_mwh\"]\n",
-            "    .sum()\n",
-            "    .reset_index()\n",
-            "    .rename(\n",
-            "        columns={\n",
-            "            \"fuel_category_eia930\": \"fuel_category\",\n",
-            "            \"net_generation_mwh\": \"net_generation_mwh_bac_trans\",\n",
-            "        }\n",
-            "    )\n",
-            ")\n",
-            "\n",
-            "# Aggregate transmission-connected generation by physical BA\n",
-            "cems_bap_trans = (\n",
-            "    cems[cems[\"distribution_flag\"] == False]\n",
-            "    .groupby([\"ba_code_physical\", \"fuel_category_eia930\", \"datetime_utc\"])[\n",
-            "        \"net_generation_mwh\"\n",
-            "    ]\n",
-            "    .sum()\n",
-            "    .reset_index()\n",
-            "    .rename(\n",
-            "        columns={\n",
-            "            \"fuel_category_eia930\": \"fuel_category\",\n",
-            "            \"ba_code_physical\": \"ba_code\",\n",
-            "            \"net_generation_mwh\": \"net_generation_mwh_bap_trans\",\n",
-            "        }\n",
-            "    )\n",
-            ")\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# merge the aggregated data into the EIA data\n",
-            "combined_data = cleaned.merge(\n",
-            "    cems_bac_all, how=\"left\", on=[\"ba_code\", \"fuel_category\", \"datetime_utc\"]\n",
-            ").fillna(0)\n",
-            "combined_data = combined_data.merge(\n",
-            "    cems_bap_all, how=\"left\", on=[\"ba_code\", \"fuel_category\", \"datetime_utc\"]\n",
-            ").fillna(0)\n",
-            "combined_data = combined_data.merge(\n",
-            "    cems_bac_trans, how=\"left\", on=[\"ba_code\", \"fuel_category\", \"datetime_utc\"]\n",
-            ").fillna(0)\n",
-            "combined_data = combined_data.merge(\n",
-            "    cems_bap_trans, how=\"left\", on=[\"ba_code\", \"fuel_category\", \"datetime_utc\"]\n",
-            ").fillna(0)\n",
-            "combined_data\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "cleaned\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# only keep rows where local datetime is in the current year\n",
-            "combined_data = combined_data[\n",
-            "    combined_data[\"datetime_local\"].apply(lambda x: x.year) == year\n",
-            "]\n"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "metadata": {},
-         "source": [
-            "# Evaluate BA mappings\n",
-            "\n",
-            "Mapping options: \n",
-            "Physical or commercial BA; include or exclude distribution-connected generation \n",
-            "\n",
-            "Metric: 923 data aggregated to BA should be close to 930 data aggregated to month. \n",
-            "For each BA, which aggregation metric minimizes difference? \n",
-            "How different are the aggreagtion metrics?  "
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "eia923 = pd.read_csv(f\"../data/outputs/eia923_allocated_{year}.csv\")\n",
-            "eia923.report_date = pd.to_datetime(\n",
-            "    eia923.report_date\n",
-            ")  # TODO why is this not a date already?\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "eia923 = eia923.merge(plant_attributes, how=\"left\", on=\"plant_id_eia\")\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "eia930_agg = (\n",
-            "    cleaned.groupby([\"ba_code\", \"fuel_category\", \"report_date\"]).sum().reset_index()\n",
-            ")\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "eia923.head()\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "eia930_agg.head()\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "eia923_agg = eia930_agg.copy()\n",
-            "# aggregate all generation by commercial BA\n",
-            "for ba_key in [\"ba_code\", \"ba_code_physical\"]:\n",
-            "    for transmission_only in [\"transmission\", \"all\"]:\n",
-            "        transmission_key = transmission_only == \"transmission\"\n",
-            "        aggregated_gen = aggregate_for_residual(\n",
-            "            eia923, time_key=\"report_date\", ba_key=ba_key, transmission=transmission_key\n",
-            "        )\n",
-            "        aggregated_gen.rename(\n",
-            "            columns={\"net_generation_mwh\": f\"mwh_{ba_key}_{transmission_only}\"},\n",
-            "            inplace=True,\n",
-            "        )\n",
-            "        aggregated_gen[f\"difference_{ba_key}_{transmission_only}\"] = (\n",
-            "            eia923_agg[\"net_generation_mwh_930\"]\n",
-            "            - aggregated_gen[f\"mwh_{ba_key}_{transmission_only}\"]\n",
-            "        )\n",
-            "        eia923_agg = eia923_agg.merge(\n",
-            "            aggregated_gen, how=\"left\", on=[\"ba_code\", \"fuel_category\", \"report_date\"]\n",
-            "        )\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "eia923_agg.head()\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "fig = px.histogram(\n",
-            "    eia923_agg,\n",
-            "    x=[\n",
-            "        \"difference_ba_code_transmission\",\n",
-            "        \"difference_ba_code_all\",\n",
-            "        \"difference_ba_code_physical_transmission\",\n",
-            "        \"difference_ba_code_physical_all\",\n",
-            "    ],\n",
-            ")\n",
-            "\n",
-            "# Overlay both histograms\n",
-            "fig.update_layout(barmode=\"overlay\")\n",
-            "# Reduce opacity to see both histograms\n",
-            "fig.update_traces(opacity=0.25)\n",
-            "fig.show()\n"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "metadata": {},
-         "source": [
-            "# Visualize net generation data from each source in a single BA\n",
-            "Only visualize non-renewable data"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "ba = \"MISO\"\n",
-            "\n",
-            "data_to_visualize = combined_data[\n",
-            "    (combined_data[\"ba_code\"] == ba)\n",
-            "    & (~combined_data[\"fuel_category\"].isin([\"hydro\", \"solar\", \"wind\"]))\n",
-            "]\n",
-            "\n",
-            "px.line(\n",
-            "    data_to_visualize,\n",
-            "    x=\"datetime_local\",\n",
-            "    y=[\n",
-            "        \"net_generation_mwh_930_clean\",\n",
-            "        \"net_generation_mwh_930_orig\",\n",
-            "        \"net_generation_mwh_bac_all\",\n",
-            "        \"net_generation_mwh_bap_all\",\n",
-            "        \"net_generation_mwh_bac_trans\",\n",
-            "        \"net_generation_mwh_bap_trans\",\n",
-            "    ],\n",
-            "    facet_col=\"fuel_category\",\n",
-            "    height=1000,\n",
-            "    facet_col_wrap=1,\n",
-            ").update_yaxes(matches=None)\n"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "metadata": {},
-         "source": [
-            "# Calculate the residual based on a single CEMS aggregation"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# for now, let's pick a single cems aggregation to use to calculate a residual\n",
-            "cems_data_column = \"net_generation_mwh_bac_all\"\n",
-            "\n",
-            "combined_data[\"residual\"] = (\n",
-            "    combined_data[\"net_generation_mwh_930_clean\"] - combined_data[cems_data_column]\n",
-            ")\n"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "metadata": {},
-         "source": [
-            "# Visualize residual for the BA"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "data_to_visualize = combined_data[\n",
-            "    (combined_data[\"ba_code\"] == ba)\n",
-            "    & (~combined_data[\"fuel_category\"].isin([\"hydro\", \"solar\", \"wind\"]))\n",
-            "]\n",
-            "\n",
-            "px.line(\n",
-            "    data_to_visualize,\n",
-            "    x=\"datetime_local\",\n",
-            "    y=[\"net_generation_mwh_930_clean\", cems_data_column, \"residual\"],\n",
-            "    facet_col=\"fuel_category\",\n",
-            "    height=1000,\n",
-            "    facet_col_wrap=1,\n",
-            ").update_yaxes(matches=None)\n"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "metadata": {},
-         "source": [
-            "# Test Scaling Strategy\n",
-            "If the residual is ever negative, we want to scale the cems net generation data to always be less than or equal to the 930 net generation. \n",
-            "\n",
-            "To do this, we'll try scaling the data as a percentage:\n",
-            "1. For each hour, calculate the ratio between 930 NG and CEMS NG.\n",
-            "2. For each BA-fuel, find the minimum ratio. If the minimum ratio is >= 1, it means that 930 is always greater than CEMS and doesn't need to be scaled. For any BA-fuels where the ratio is < 1, we will use this as a scaling factor to scale the CEMS data such that the scaled data is always <= the 930 data\n",
-            "3. Multiply all hourly CEMS values by the scaling factor"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# only keep data where the cems data is greater than zero\n",
-            "scaling_factors = combined_data.copy()[combined_data[cems_data_column] != 0]\n",
-            "\n",
-            "# calculate the ratio of 930 net generation to cems net generation\n",
-            "# if correct, ratio should be >=1\n",
-            "scaling_factors[\"scaling_factor\"] = (\n",
-            "    scaling_factors[\"net_generation_mwh_930_clean\"] / scaling_factors[cems_data_column]\n",
-            ")\n",
-            "\n",
-            "# find the minimum ratio for each ba-fuel\n",
-            "scaling_factors = (\n",
-            "    scaling_factors.groupby([\"ba_code\", \"fuel_category\"])[\"scaling_factor\"]\n",
-            "    .min()\n",
-            "    .reset_index()\n",
-            ")\n",
-            "scaling_factors\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# merge the scaling factor into the combined data\n",
-            "# for any BA-fuels without a scaling factor, fill with 1 (scale to 100% of the origina data)\n",
-            "combined_data = combined_data.merge(\n",
-            "    scaling_factors, how=\"left\", on=[\"ba_code\", \"fuel_category\"]\n",
-            ").fillna(1)\n",
-            "\n",
-            "# calculate the scaled cems data\n",
-            "combined_data[\"cems_scaled\"] = (\n",
-            "    combined_data[cems_data_column] * combined_data[\"scaling_factor\"]\n",
-            ")\n",
-            "\n",
-            "# calculate a scaled residual\n",
-            "combined_data[\"residual_scaled\"] = (\n",
-            "    combined_data[\"net_generation_mwh_930_clean\"] - combined_data[\"cems_scaled\"]\n",
-            ")\n"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "metadata": {},
-         "source": [
-            "# Plot scaled residuals"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "ba = \"PJM\"\n",
-            "\n",
-            "data_to_visualize = combined_data[\n",
-            "    (combined_data[\"ba_code\"] == ba)\n",
-            "    & (~combined_data[\"fuel_category\"].isin([\"hydro\", \"solar\", \"wind\"]))\n",
-            "]\n",
-            "\n",
-            "px.line(\n",
-            "    data_to_visualize,\n",
-            "    x=\"datetime_local\",\n",
-            "    y=[\n",
-            "        \"net_generation_mwh_930_clean\",\n",
-            "        cems_data_column,\n",
-            "        \"cems_scaled\",\n",
-            "        \"residual\",\n",
-            "        \"residual_scaled\",\n",
-            "    ],\n",
-            "    facet_col=\"fuel_category\",\n",
-            "    height=1000,\n",
-            "    facet_col_wrap=1,\n",
-            ").update_yaxes(matches=None)\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "combined_data = combined_data.reset_index()\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# broken\n",
-            "\n",
-            "ba = \"MISO\"\n",
-            "fuel = \"natural_gas\"\n",
-            "\n",
-            "data_to_visualize = combined_data.copy()[\n",
-            "    (combined_data[\"ba_code\"] == ba) & (combined_data[\"fuel_category\"] == fuel)\n",
-            "]\n",
-            "data_to_visualize[\"datetime_local\"] = pd.to_datetime(\n",
-            "    data_to_visualize[\"datetime_local\"]\n",
-            ")\n",
-            "data_to_visualize[\"date\"] = data_to_visualize[\"datetime_local\"].dt.date\n",
-            "data_to_visualize[\"hour\"] = data_to_visualize[\"datetime_local\"].dt.hour\n",
-            "\n",
-            "# data_to_visualize = data_to_visualize.pivot(index='hour', columns='date', values='residual_scaled')\n",
-            "\n",
-            "# px.imshow(data_to_visualize, color_continuous_scale=\"RdBu\", width=1000, height=400, color_continuous_midpoint=0,)\n"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "metadata": {},
-         "source": [
-            "# Export the profile"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "data_to_export = combined_data[\n",
-            "    [\n",
-            "        \"ba_code\",\n",
-            "        \"fuel_category\",\n",
-            "        \"datetime_utc\",\n",
-            "        \"datetime_local\",\n",
-            "        \"report_date\",\n",
-            "        \"residual_scaled\",\n",
-            "    ]\n",
-            "]\n",
-            "data_to_export.to_csv(\"../data/output/residual_profiles.csv\", index=False)\n"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "metadata": {},
-         "source": [
-            "# Evaluate profile quality\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "with open(\"../data/outputs/residual_profiles.csv\") as f:\n",
-            "    line = f.readline()\n",
-            "    print(line.split(\",\"))\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "cems.head()\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# Produced by data_pipeline\n",
-            "eia = pd.read_csv(\"../data/output/eia923_for_residual.csv\")\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# Produced by plant_class_differences\n",
-            "# TODO use output plant data to find smallest plants after cleaning -- some of these are in \"no cems\" data categories\n",
-            "validation_plants = pd.read_csv(\"../data/output/validation_plants.csv\")\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "to_distribute = eia[eia.plant_id_eia.isin(validation_plants.plant_id_eia)]\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "to_distribute.head()\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "distributed = distribute_monthly_eia_data_to_hourly(\n",
-            "    to_distribute, combined_data, \"residual_scaled\"\n",
-            ")\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "cleaned = load_chalendar_for_pipeline(\n",
-            "    \"../data/eia930/chalendar/EBA_adjusted_elec.csv\", year=year\n",
-            ")\n",
-            "cems = pd.read_csv(\n",
-            "    f\"../data/outputs/cems_{year}.csv\",\n",
-            "    parse_dates=[\"datetime_utc\"],\n",
-            ")\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "cleaned = cleaned.rename(columns={\"datetime_utc\": \"datetime_utc\"})\n",
-            "cems = cems.rename(columns={\"datetime_utc\": \"datetime_utc\"})\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "plant_attributes = pd.read_csv(\"../data/outputs/plant_static_attributes.csv\")\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "cleaned.head()\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "calculate_residual(cems, cleaned, plant_attributes, 2020)\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "cems\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": []
-      }
-   ],
-   "metadata": {
-      "kernelspec": {
-         "display_name": "Python 3.10.5 ('hourly_egrid')",
-         "language": "python",
-         "name": "python3"
-      },
-      "language_info": {
-         "codemirror_mode": {
-            "name": "ipython",
-            "version": 3
-         },
-         "file_extension": ".py",
-         "mimetype": "text/x-python",
-         "name": "python",
-         "nbconvert_exporter": "python",
-         "pygments_lexer": "ipython3",
-         "version": "3.10.5"
-      },
-      "orig_nbformat": 4,
-      "vscode": {
-         "interpreter": {
-            "hash": "4103f3cd497821eca917ea303dbe10c590d787eb7d2dc3fd4e15dec0356e7931"
-         }
-      }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Cleaning of 930, analyze how cleaning affects residual profile"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# data source https://gridemissions.s3.us-east-2.amazonaws.com/EBA_elec.csv.gz\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "import plotly.express as px\n",
+    "import plotly.graph_objects as go\n",
+    "\n",
+    "import datetime as dt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%reload_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "# Tell python where to look for modules.\n",
+    "# Depending on how your jupyter handles working directories, this may not be needed.\n",
+    "import sys\n",
+    "\n",
+    "sys.path.append(\"../../open-grid-emissions/\")\n",
+    "\n",
+    "from src.visualization import day_hour_heatmap\n",
+    "from src.eia930 import (\n",
+    "    fuel_code_map,\n",
+    "    reformat_chalendar,\n",
+    "    load_chalendar,\n",
+    "    load_chalendar_for_pipeline,\n",
+    ")\n",
+    "from src.download_data import download_chalendar_files\n",
+    "from src.data_cleaning import distribute_monthly_eia_data_to_hourly\n",
+    "from src.impute_hourly_profiles import aggregate_for_residual, calculate_residual"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "year = 2020"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download data if not exists\n",
+    "download_chalendar_files()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "original = load_chalendar_for_pipeline(\n",
+    "    \"../data/eia930/chalendar/EBA_adjusted_rolling.csv\", year=year\n",
+    ")\n",
+    "original.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cleaned = load_chalendar_for_pipeline(\n",
+    "    \"../data/eia930/chalendar/EBA_adjusted_elec.csv\", year=year\n",
+    ")\n",
+    "cleaned.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Name column same as CEMS. TODO: make eia930 output use this name\n",
+    "cleaned = cleaned.rename(columns={\"datetime_utc\": \"datetime_utc\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load hourly CEMS data outputted from main data pipeline\n",
+    "cems = pd.read_csv(\n",
+    "    f\"../data/outputs/cems_{year}.csv\",\n",
+    "    parse_dates=[\"datetime_utc\"],\n",
+    ")\n",
+    "cems.head(5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Aggregate CEMS data and merge with EIA-930 data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # combine original and cleaned EIA-930 data\n",
+    "# combined_data = cleaned.merge(\n",
+    "#     original[[\"ba_code\", \"fuel_category\", \"datetime_utc\", \"net_generation_mwh_930\"]],\n",
+    "#     how=\"left\",\n",
+    "#     on=[\"ba_code\", \"fuel_category\", \"datetime_utc\"],\n",
+    "#     suffixes=(\"_clean\", \"_orig\"),\n",
+    "# )\n",
+    "# combined_data\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load plant attributes (including BA codes)\n",
+    "plant_attributes = pd.read_csv(\"../data/outputs/plant_static_attributes.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plant_attributes.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cems = cems.merge(plant_attributes, how=\"left\", on=\"plant_id_eia\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "aggregate_for_residual(cems, \"datetime_utc\", \"ba_code_physical\", transmission=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# perform different groupby operations so that we can compare different ways of aggregating the cems data\n",
+    "\n",
+    "# aggregate all generation by commercial BA\n",
+    "cems_bac_all = (\n",
+    "    cems.groupby([\"ba_code\", \"fuel_category_eia930\", \"datetime_utc\"])[\n",
+    "        \"net_generation_mwh\"\n",
+    "    ]\n",
+    "    .sum()\n",
+    "    .reset_index()\n",
+    "    .rename(\n",
+    "        columns={\n",
+    "            \"fuel_category_eia930\": \"fuel_category\",\n",
+    "            \"net_generation_mwh\": \"net_generation_mwh_bac_all\",\n",
+    "        }\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# aggregate all generation by physical BA\n",
+    "cems_bap_all = (\n",
+    "    cems.groupby([\"ba_code_physical\", \"fuel_category_eia930\", \"datetime_utc\"])[\n",
+    "        \"net_generation_mwh\"\n",
+    "    ]\n",
+    "    .sum()\n",
+    "    .reset_index()\n",
+    "    .rename(\n",
+    "        columns={\n",
+    "            \"fuel_category_eia930\": \"fuel_category\",\n",
+    "            \"ba_code_physical\": \"ba_code\",\n",
+    "            \"net_generation_mwh\": \"net_generation_mwh_bap_all\",\n",
+    "        }\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# Aggregate transmission-connected generation by commercial BA\n",
+    "cems_bac_trans = (\n",
+    "    cems[cems[\"distribution_flag\"] is False]\n",
+    "    .groupby([\"ba_code\", \"fuel_category_eia930\", \"datetime_utc\"])[\"net_generation_mwh\"]\n",
+    "    .sum()\n",
+    "    .reset_index()\n",
+    "    .rename(\n",
+    "        columns={\n",
+    "            \"fuel_category_eia930\": \"fuel_category\",\n",
+    "            \"net_generation_mwh\": \"net_generation_mwh_bac_trans\",\n",
+    "        }\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# Aggregate transmission-connected generation by physical BA\n",
+    "cems_bap_trans = (\n",
+    "    cems[cems[\"distribution_flag\"] == False]\n",
+    "    .groupby([\"ba_code_physical\", \"fuel_category_eia930\", \"datetime_utc\"])[\n",
+    "        \"net_generation_mwh\"\n",
+    "    ]\n",
+    "    .sum()\n",
+    "    .reset_index()\n",
+    "    .rename(\n",
+    "        columns={\n",
+    "            \"fuel_category_eia930\": \"fuel_category\",\n",
+    "            \"ba_code_physical\": \"ba_code\",\n",
+    "            \"net_generation_mwh\": \"net_generation_mwh_bap_trans\",\n",
+    "        }\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# merge the aggregated data into the EIA data\n",
+    "combined_data = cleaned.merge(\n",
+    "    cems_bac_all, how=\"left\", on=[\"ba_code\", \"fuel_category\", \"datetime_utc\"]\n",
+    ").fillna(0)\n",
+    "combined_data = combined_data.merge(\n",
+    "    cems_bap_all, how=\"left\", on=[\"ba_code\", \"fuel_category\", \"datetime_utc\"]\n",
+    ").fillna(0)\n",
+    "combined_data = combined_data.merge(\n",
+    "    cems_bac_trans, how=\"left\", on=[\"ba_code\", \"fuel_category\", \"datetime_utc\"]\n",
+    ").fillna(0)\n",
+    "combined_data = combined_data.merge(\n",
+    "    cems_bap_trans, how=\"left\", on=[\"ba_code\", \"fuel_category\", \"datetime_utc\"]\n",
+    ").fillna(0)\n",
+    "combined_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cleaned"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# only keep rows where local datetime is in the current year\n",
+    "combined_data = combined_data[\n",
+    "    combined_data[\"datetime_local\"].apply(lambda x: x.year) == year\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Evaluate BA mappings\n",
+    "\n",
+    "Mapping options: \n",
+    "Physical or commercial BA; include or exclude distribution-connected generation \n",
+    "\n",
+    "Metric: 923 data aggregated to BA should be close to 930 data aggregated to month. \n",
+    "For each BA, which aggregation metric minimizes difference? \n",
+    "How different are the aggreagtion metrics?  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eia923 = pd.read_csv(f\"../data/outputs/eia923_allocated_{year}.csv\")\n",
+    "eia923.report_date = pd.to_datetime(\n",
+    "    eia923.report_date\n",
+    ")  # TODO why is this not a date already?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eia923 = eia923.merge(plant_attributes, how=\"left\", on=\"plant_id_eia\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eia930_agg = (\n",
+    "    cleaned.groupby([\"ba_code\", \"fuel_category\", \"report_date\"]).sum().reset_index()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eia923.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eia930_agg.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eia923_agg = eia930_agg.copy()\n",
+    "# aggregate all generation by commercial BA\n",
+    "for ba_key in [\"ba_code\", \"ba_code_physical\"]:\n",
+    "    for transmission_only in [\"transmission\", \"all\"]:\n",
+    "        transmission_key = transmission_only == \"transmission\"\n",
+    "        aggregated_gen = aggregate_for_residual(\n",
+    "            eia923, time_key=\"report_date\", ba_key=ba_key, transmission=transmission_key\n",
+    "        )\n",
+    "        aggregated_gen.rename(\n",
+    "            columns={\"net_generation_mwh\": f\"mwh_{ba_key}_{transmission_only}\"},\n",
+    "            inplace=True,\n",
+    "        )\n",
+    "        aggregated_gen[f\"difference_{ba_key}_{transmission_only}\"] = (\n",
+    "            eia923_agg[\"net_generation_mwh_930\"]\n",
+    "            - aggregated_gen[f\"mwh_{ba_key}_{transmission_only}\"]\n",
+    "        )\n",
+    "        eia923_agg = eia923_agg.merge(\n",
+    "            aggregated_gen, how=\"left\", on=[\"ba_code\", \"fuel_category\", \"report_date\"]\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eia923_agg.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = px.histogram(\n",
+    "    eia923_agg,\n",
+    "    x=[\n",
+    "        \"difference_ba_code_transmission\",\n",
+    "        \"difference_ba_code_all\",\n",
+    "        \"difference_ba_code_physical_transmission\",\n",
+    "        \"difference_ba_code_physical_all\",\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "# Overlay both histograms\n",
+    "fig.update_layout(barmode=\"overlay\")\n",
+    "# Reduce opacity to see both histograms\n",
+    "fig.update_traces(opacity=0.25)\n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Visualize net generation data from each source in a single BA\n",
+    "Only visualize non-renewable data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ba = \"MISO\"\n",
+    "\n",
+    "data_to_visualize = combined_data[\n",
+    "    (combined_data[\"ba_code\"] == ba)\n",
+    "    & (~combined_data[\"fuel_category\"].isin([\"hydro\", \"solar\", \"wind\"]))\n",
+    "]\n",
+    "\n",
+    "px.line(\n",
+    "    data_to_visualize,\n",
+    "    x=\"datetime_local\",\n",
+    "    y=[\n",
+    "        \"net_generation_mwh_930_clean\",\n",
+    "        \"net_generation_mwh_930_orig\",\n",
+    "        \"net_generation_mwh_bac_all\",\n",
+    "        \"net_generation_mwh_bap_all\",\n",
+    "        \"net_generation_mwh_bac_trans\",\n",
+    "        \"net_generation_mwh_bap_trans\",\n",
+    "    ],\n",
+    "    facet_col=\"fuel_category\",\n",
+    "    height=1000,\n",
+    "    facet_col_wrap=1,\n",
+    ").update_yaxes(matches=None)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Calculate the residual based on a single CEMS aggregation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for now, let's pick a single cems aggregation to use to calculate a residual\n",
+    "cems_data_column = \"net_generation_mwh_bac_all\"\n",
+    "\n",
+    "combined_data[\"residual\"] = (\n",
+    "    combined_data[\"net_generation_mwh_930_clean\"] - combined_data[cems_data_column]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Visualize residual for the BA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_to_visualize = combined_data[\n",
+    "    (combined_data[\"ba_code\"] == ba)\n",
+    "    & (~combined_data[\"fuel_category\"].isin([\"hydro\", \"solar\", \"wind\"]))\n",
+    "]\n",
+    "\n",
+    "px.line(\n",
+    "    data_to_visualize,\n",
+    "    x=\"datetime_local\",\n",
+    "    y=[\"net_generation_mwh_930_clean\", cems_data_column, \"residual\"],\n",
+    "    facet_col=\"fuel_category\",\n",
+    "    height=1000,\n",
+    "    facet_col_wrap=1,\n",
+    ").update_yaxes(matches=None)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Test Scaling Strategy\n",
+    "If the residual is ever negative, we want to scale the cems net generation data to always be less than or equal to the 930 net generation. \n",
+    "\n",
+    "To do this, we'll try scaling the data as a percentage:\n",
+    "1. For each hour, calculate the ratio between 930 NG and CEMS NG.\n",
+    "2. For each BA-fuel, find the minimum ratio. If the minimum ratio is >= 1, it means that 930 is always greater than CEMS and doesn't need to be scaled. For any BA-fuels where the ratio is < 1, we will use this as a scaling factor to scale the CEMS data such that the scaled data is always <= the 930 data\n",
+    "3. Multiply all hourly CEMS values by the scaling factor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# only keep data where the cems data is greater than zero\n",
+    "scaling_factors = combined_data.copy()[combined_data[cems_data_column] != 0]\n",
+    "\n",
+    "# calculate the ratio of 930 net generation to cems net generation\n",
+    "# if correct, ratio should be >=1\n",
+    "scaling_factors[\"scaling_factor\"] = (\n",
+    "    scaling_factors[\"net_generation_mwh_930_clean\"] / scaling_factors[cems_data_column]\n",
+    ")\n",
+    "\n",
+    "# find the minimum ratio for each ba-fuel\n",
+    "scaling_factors = (\n",
+    "    scaling_factors.groupby([\"ba_code\", \"fuel_category\"])[\"scaling_factor\"]\n",
+    "    .min()\n",
+    "    .reset_index()\n",
+    ")\n",
+    "scaling_factors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# merge the scaling factor into the combined data\n",
+    "# for any BA-fuels without a scaling factor, fill with 1 (scale to 100% of the origina data)\n",
+    "combined_data = combined_data.merge(\n",
+    "    scaling_factors, how=\"left\", on=[\"ba_code\", \"fuel_category\"]\n",
+    ").fillna(1)\n",
+    "\n",
+    "# calculate the scaled cems data\n",
+    "combined_data[\"cems_scaled\"] = (\n",
+    "    combined_data[cems_data_column] * combined_data[\"scaling_factor\"]\n",
+    ")\n",
+    "\n",
+    "# calculate a scaled residual\n",
+    "combined_data[\"residual_scaled\"] = (\n",
+    "    combined_data[\"net_generation_mwh_930_clean\"] - combined_data[\"cems_scaled\"]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Plot scaled residuals"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ba = \"PJM\"\n",
+    "\n",
+    "data_to_visualize = combined_data[\n",
+    "    (combined_data[\"ba_code\"] == ba)\n",
+    "    & (~combined_data[\"fuel_category\"].isin([\"hydro\", \"solar\", \"wind\"]))\n",
+    "]\n",
+    "\n",
+    "px.line(\n",
+    "    data_to_visualize,\n",
+    "    x=\"datetime_local\",\n",
+    "    y=[\n",
+    "        \"net_generation_mwh_930_clean\",\n",
+    "        cems_data_column,\n",
+    "        \"cems_scaled\",\n",
+    "        \"residual\",\n",
+    "        \"residual_scaled\",\n",
+    "    ],\n",
+    "    facet_col=\"fuel_category\",\n",
+    "    height=1000,\n",
+    "    facet_col_wrap=1,\n",
+    ").update_yaxes(matches=None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "combined_data = combined_data.reset_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# broken\n",
+    "\n",
+    "ba = \"MISO\"\n",
+    "fuel = \"natural_gas\"\n",
+    "\n",
+    "data_to_visualize = combined_data.copy()[\n",
+    "    (combined_data[\"ba_code\"] == ba) & (combined_data[\"fuel_category\"] == fuel)\n",
+    "]\n",
+    "data_to_visualize[\"datetime_local\"] = pd.to_datetime(\n",
+    "    data_to_visualize[\"datetime_local\"]\n",
+    ")\n",
+    "data_to_visualize[\"date\"] = data_to_visualize[\"datetime_local\"].dt.date\n",
+    "data_to_visualize[\"hour\"] = data_to_visualize[\"datetime_local\"].dt.hour\n",
+    "\n",
+    "# data_to_visualize = data_to_visualize.pivot(index='hour', columns='date', values='residual_scaled')\n",
+    "\n",
+    "# px.imshow(data_to_visualize, color_continuous_scale=\"RdBu\", width=1000, height=400, color_continuous_midpoint=0,)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Export the profile"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_to_export = combined_data[\n",
+    "    [\n",
+    "        \"ba_code\",\n",
+    "        \"fuel_category\",\n",
+    "        \"datetime_utc\",\n",
+    "        \"datetime_local\",\n",
+    "        \"report_date\",\n",
+    "        \"residual_scaled\",\n",
+    "    ]\n",
+    "]\n",
+    "data_to_export.to_csv(\"../data/output/residual_profiles.csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Evaluate profile quality\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"../data/outputs/residual_profiles.csv\") as f:\n",
+    "    line = f.readline()\n",
+    "    print(line.split(\",\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cems.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Produced by data_pipeline\n",
+    "eia = pd.read_csv(\"../data/output/eia923_for_residual.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Produced by plant_class_differences\n",
+    "# TODO use output plant data to find smallest plants after cleaning -- some of these are in \"no cems\" data categories\n",
+    "validation_plants = pd.read_csv(\"../data/output/validation_plants.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "to_distribute = eia[eia.plant_id_eia.isin(validation_plants.plant_id_eia)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "to_distribute.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "distributed = distribute_monthly_eia_data_to_hourly(\n",
+    "    to_distribute, combined_data, \"residual_scaled\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cleaned = load_chalendar_for_pipeline(\n",
+    "    \"../data/eia930/chalendar/EBA_adjusted_elec.csv\", year=year\n",
+    ")\n",
+    "cems = pd.read_csv(\n",
+    "    f\"../data/outputs/cems_{year}.csv\",\n",
+    "    parse_dates=[\"datetime_utc\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cleaned = cleaned.rename(columns={\"datetime_utc\": \"datetime_utc\"})\n",
+    "cems = cems.rename(columns={\"datetime_utc\": \"datetime_utc\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plant_attributes = pd.read_csv(\"../data/outputs/plant_static_attributes.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cleaned.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "calculate_residual(cems, cleaned, plant_attributes, 2020)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cems"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10.5 ('hourly_egrid')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
    },
-   "nbformat": 4,
-   "nbformat_minor": 2
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.5"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "4103f3cd497821eca917ea303dbe10c590d787eb7d2dc3fd4e15dec0356e7931"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
 }
diff --git a/notebooks/explore_methods/national_wind_solar_correlations.ipynb b/notebooks/explore_methods/national_wind_solar_correlations.ipynb
index 1b6a7669..99b4ff37 100644
--- a/notebooks/explore_methods/national_wind_solar_correlations.ipynb
+++ b/notebooks/explore_methods/national_wind_solar_correlations.ipynb
@@ -27,7 +27,8 @@
     "\n",
     "# # Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
     "\n",
     "\n",
     "import eia930\n",
@@ -47,7 +48,7 @@
     "# load eia930 data\n",
     "\n",
     "# If running small, we didn't clean the whole year, so need to use the Chalender file to build residual profiles.\n",
-    "clean_930_file = (f\"{outputs_folder()}{path_prefix}/eia930/eia930_elec.csv\")\n",
+    "clean_930_file = f\"{outputs_folder()}{path_prefix}/eia930/eia930_elec.csv\"\n",
     "eia930_data = eia930.load_chalendar_for_pipeline(clean_930_file, year=year)\n",
     "# until we can fix the physics reconciliation, we need to apply some post-processing steps\n",
     "eia930_data = eia930.remove_imputed_ones(eia930_data)\n",
@@ -67,8 +68,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fuel=\"wind\"\n",
-    "report_date=\"2020-11-01\"\n",
+    "fuel = \"wind\"\n",
+    "report_date = \"2020-11-01\"\n",
     "\n",
     "df_temporary = eia930_data.copy()[\n",
     "    (eia930_data[\"fuel_category_eia930\"] == fuel)\n",
@@ -89,7 +90,9 @@
    "outputs": [],
    "source": [
     "# how well correlated are profiles across utc time\n",
-    "df_temporary.pivot(index=\"datetime_utc\", columns=\"ba_code\", values=\"net_generation_mwh_930\").corr().mean().mean()"
+    "df_temporary.pivot(\n",
+    "    index=\"datetime_utc\", columns=\"ba_code\", values=\"net_generation_mwh_930\"\n",
+    ").corr().mean().mean()"
    ]
   },
   {
@@ -99,7 +102,9 @@
    "outputs": [],
    "source": [
     "# how well correlated are profiles across local time\n",
-    "df_temporary.pivot(index=\"datetime_local\", columns=\"ba_code\", values=\"net_generation_mwh_930\").corr().mean().mean()"
+    "df_temporary.pivot(\n",
+    "    index=\"datetime_local\", columns=\"ba_code\", values=\"net_generation_mwh_930\"\n",
+    ").corr().mean().mean()"
    ]
   }
  ],
diff --git a/notebooks/manual_data/default_fuel_sulfur_content.ipynb b/notebooks/manual_data/default_fuel_sulfur_content.ipynb
index d70ac65d..48779e24 100644
--- a/notebooks/manual_data/default_fuel_sulfur_content.ipynb
+++ b/notebooks/manual_data/default_fuel_sulfur_content.ipynb
@@ -30,20 +30,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "for year in [2015,2016,2017,2018,2019,2020]:\n",
+    "for year in [2015, 2016, 2017, 2018, 2019, 2020]:\n",
+    "    pudl_out = load_data.initialize_pudl_out(year)\n",
     "\n",
-    "        pudl_out = load_data.initialize_pudl_out(year)\n",
+    "    (\n",
+    "        plant_specific_fuel_sulfur_content,\n",
+    "        national_avg_fuel_sulfur_content,\n",
+    "        annual_avg_fuel_sulfur_content,\n",
+    "    ) = emissions.return_monthly_plant_fuel_sulfur_content(pudl_out)\n",
     "\n",
-    "        (plant_specific_fuel_sulfur_content,\n",
-    "                national_avg_fuel_sulfur_content,\n",
-    "                annual_avg_fuel_sulfur_content) = emissions.return_monthly_plant_fuel_sulfur_content(pudl_out)\n",
+    "    annual_avg_fuel_sulfur_content = annual_avg_fuel_sulfur_content.rename(\n",
+    "        columns={\"sulfur_content_pct\": f\"sulfur_content_pct_{year}\"}\n",
+    "    )\n",
     "\n",
-    "        annual_avg_fuel_sulfur_content = annual_avg_fuel_sulfur_content.rename(columns={\"sulfur_content_pct\":f\"sulfur_content_pct_{year}\"})\n",
-    "\n",
-    "        if year == 2015:\n",
-    "                result = annual_avg_fuel_sulfur_content.copy()\n",
-    "        else:\n",
-    "                result = result.merge(annual_avg_fuel_sulfur_content, how=\"outer\", on=\"energy_source_code\")\n",
+    "    if year == 2015:\n",
+    "        result = annual_avg_fuel_sulfur_content.copy()\n",
+    "    else:\n",
+    "        result = result.merge(\n",
+    "            annual_avg_fuel_sulfur_content, how=\"outer\", on=\"energy_source_code\"\n",
+    "        )\n",
     "\n",
     "result"
    ]
diff --git a/notebooks/manual_data/export_fuel_heat_content.ipynb b/notebooks/manual_data/export_fuel_heat_content.ipynb
index 62dee1bf..6a01315e 100644
--- a/notebooks/manual_data/export_fuel_heat_content.ipynb
+++ b/notebooks/manual_data/export_fuel_heat_content.ipynb
@@ -15,11 +15,12 @@
     "\n",
     "# # Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
     "\n",
     "import load_data\n",
     "from filepaths import *\n",
-    "import emissions\n"
+    "import emissions"
    ]
   },
   {
@@ -57,7 +58,7 @@
     "\n",
     "fuel_heat_content.to_csv(\n",
     "    outputs_folder(\"annual_average_fuel_heat_content.csv\"), index=False\n",
-    ")\n"
+    ")"
    ]
   }
  ],
diff --git a/notebooks/manual_data/identify_eia930_time_lags.ipynb b/notebooks/manual_data/identify_eia930_time_lags.ipynb
index 06cfd726..04d54e5b 100644
--- a/notebooks/manual_data/identify_eia930_time_lags.ipynb
+++ b/notebooks/manual_data/identify_eia930_time_lags.ipynb
@@ -57,7 +57,8 @@
     "\n",
     "# # Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
     "\n",
     "import download_data\n",
     "import load_data\n",
@@ -78,8 +79,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Data before and after shifts \n",
-    "# Note: this is very slow! (~30min) because it's pivoting large files. \n",
+    "# Data before and after shifts\n",
+    "# Note: this is very slow! (~30min) because it's pivoting large files.\n",
     "lraw = []\n",
     "lshift = []\n",
     "\n",
@@ -92,16 +93,16 @@
     "    s = eia930.reformat_chalendar(s)\n",
     "    r = eia930.reformat_chalendar(r)\n",
     "\n",
-    "    s = s[s.fuel.isin([\"COL\",\"NG\",\"OIL\"])]\n",
+    "    s = s[s.fuel.isin([\"COL\", \"NG\", \"OIL\"])]\n",
     "    s = s.rename(columns={\"UTC Time at End of Hour\": \"datetime_utc\"})\n",
-    "    s = s.groupby([\"datetime_utc\",\"BA\"]).sum()[\"generation\"].reset_index()\n",
-    "    s = s[s.datetime_utc.dt.year == year] # filter for year\n",
+    "    s = s.groupby([\"datetime_utc\", \"BA\"]).sum()[\"generation\"].reset_index()\n",
+    "    s = s[s.datetime_utc.dt.year == year]  # filter for year\n",
     "\n",
     "    # Filter for fossil fuels, sum by BA\n",
-    "    r = r[r.fuel.isin([\"COL\",\"NG\",\"OIL\"])]\n",
+    "    r = r[r.fuel.isin([\"COL\", \"NG\", \"OIL\"])]\n",
     "    r = r.rename(columns={\"UTC Time at End of Hour\": \"datetime_utc\"})\n",
-    "    r = r.groupby([\"datetime_utc\",\"BA\"]).sum()[\"generation\"].reset_index()\n",
-    "    r = r[r.datetime_utc.dt.year == year] # filter for year\n",
+    "    r = r.groupby([\"datetime_utc\", \"BA\"]).sum()[\"generation\"].reset_index()\n",
+    "    r = r[r.datetime_utc.dt.year == year]  # filter for year\n",
     "    lraw.append(r)\n",
     "    lshift.append(s)"
    ]
@@ -122,14 +123,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Load data after shifting and rolling filter \n",
+    "# Load data after shifting and rolling filter\n",
     "\n",
     "all_rolled = []\n",
     "for y in [2019, 2020, 2021]:\n",
-    "    rolled_930 = pd.read_csv(f\"../../data/outputs/{y}/eia930/eia930_rolling.csv\", index_col=0, parse_dates=True)\n",
+    "    rolled_930 = pd.read_csv(\n",
+    "        f\"../../data/outputs/{y}/eia930/eia930_rolling.csv\",\n",
+    "        index_col=0,\n",
+    "        parse_dates=True,\n",
+    "    )\n",
     "    rolled_930 = rolled_930[rolled_930.index.year == y]\n",
     "    all_rolled.append(rolled_930)\n",
-    "rolled_930 = eia930.reformat_chalendar(pd.concat(all_rolled))\n"
+    "rolled_930 = eia930.reformat_chalendar(pd.concat(all_rolled))"
    ]
   },
   {
@@ -138,9 +143,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "##### Remove renewables before summing 930 \n",
+    "##### Remove renewables before summing 930\n",
     "\n",
-    "rolled_930 = rolled_930[rolled_930.fuel.isin([\"COL\",\"NG\",\"OIL\"])].groupby([\"datetime_utc\",\"BA\"]).sum().reset_index()"
+    "rolled_930 = (\n",
+    "    rolled_930[rolled_930.fuel.isin([\"COL\", \"NG\", \"OIL\"])]\n",
+    "    .groupby([\"datetime_utc\", \"BA\"])\n",
+    "    .sum()\n",
+    "    .reset_index()\n",
+    ")"
    ]
   },
   {
@@ -152,19 +162,26 @@
     "# Load files\n",
     "# Aggregate by BA during loading to cut down on space\n",
     "cems = pd.DataFrame()\n",
-    "for y in [2019, 2020, 2021]: \n",
+    "for y in [2019, 2020, 2021]:\n",
     "    print(f\"loading {y}\")\n",
     "    file = f\"{data_folder()}/outputs/{y}/cems_cleaned_{y}.csv\"\n",
     "    plant_meta = pd.read_csv(f\"../../data/outputs/{y}/plant_static_attributes_{y}.csv\")\n",
-    "    c = pd.read_csv(file, index_col=0, parse_dates=['datetime_utc'])\n",
-    "    c = c.rename(columns={\"datetime_utc\":\"datetime_utc\"})\n",
-    "    c = c.merge(plant_meta[['plant_id_eia', 'plant_primary_fuel', 'ba_code']], how='left', left_index=True, right_on='plant_id_eia')\n",
+    "    c = pd.read_csv(file, index_col=0, parse_dates=[\"datetime_utc\"])\n",
+    "    c = c.rename(columns={\"datetime_utc\": \"datetime_utc\"})\n",
+    "    c = c.merge(\n",
+    "        plant_meta[[\"plant_id_eia\", \"plant_primary_fuel\", \"ba_code\"]],\n",
+    "        how=\"left\",\n",
+    "        left_index=True,\n",
+    "        right_on=\"plant_id_eia\",\n",
+    "    )\n",
     "    # exclude solar power for CEMS, since we're just going to look at COL + OIL + NG in the 930 data\n",
     "    c = c[c[\"plant_primary_fuel\"] != \"SUN\"]\n",
     "    print(\"Aggregating\")\n",
-    "    if y == 2021: \n",
-    "        c = c.rename(columns={\"gross_generation_mwh\":\"net_generation_mwh\"})\n",
-    "    cems_aggregated = c.groupby([\"datetime_utc\",\"ba_code\"]).sum()[\"net_generation_mwh\"].reset_index()\n",
+    "    if y == 2021:\n",
+    "        c = c.rename(columns={\"gross_generation_mwh\": \"net_generation_mwh\"})\n",
+    "    cems_aggregated = (\n",
+    "        c.groupby([\"datetime_utc\", \"ba_code\"]).sum()[\"net_generation_mwh\"].reset_index()\n",
+    "    )\n",
     "    cems = pd.concat([cems, cems_aggregated])\n",
     "\n",
     "cems.head()"
@@ -176,7 +193,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plant_attributes = pd.read_csv(outputs_folder(f\"{year}/plant_static_attributes_{year}.csv\"), dtype=get_dtypes())"
+    "plant_attributes = pd.read_csv(\n",
+    "    outputs_folder(f\"{year}/plant_static_attributes_{year}.csv\"), dtype=get_dtypes()\n",
+    ")"
    ]
   },
   {
@@ -202,7 +221,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(f\"shared BAs: {len(bas)} out of {len(raw.BA.unique())} 930 BAs and {len(cems.ba_code.unique())} CEMS BAs.\")\n",
+    "print(\n",
+    "    f\"shared BAs: {len(bas)} out of {len(raw.BA.unique())} 930 BAs and {len(cems.ba_code.unique())} CEMS BAs.\"\n",
+    ")\n",
     "\n",
     "missing_cems = set(raw.BA.unique()).difference(set(cems.ba_code.unique()))\n",
     "missing_930 = set(cems.ba_code.unique()).difference(set(raw.BA.unique()))\n",
@@ -217,26 +238,27 @@
    "outputs": [],
    "source": [
     "def find_best_cor(cems, df_eia930):\n",
-    "    cems = cems.pivot(columns=\"ba_code\", index=\"datetime_utc\", values=\"net_generation_mwh\")\n",
+    "    cems = cems.pivot(\n",
+    "        columns=\"ba_code\", index=\"datetime_utc\", values=\"net_generation_mwh\"\n",
+    "    )\n",
     "    df_eia930 = df_eia930.pivot(columns=\"BA\", index=\"datetime_utc\", values=\"generation\")\n",
     "\n",
     "    bas = set(cems.columns).intersection(set(df_eia930.columns))\n",
     "\n",
-    "    correlations = pd.DataFrame(index=bas, columns=range(-12,12), dtype=float)\n",
+    "    correlations = pd.DataFrame(index=bas, columns=range(-12, 12), dtype=float)\n",
     "\n",
     "    for ba in correlations.index:\n",
     "        for lag in correlations.columns:\n",
-    "            # prepare 930: select BA \n",
-    "            #eia = df_eia930[df_eia930.BA==ba][\"generation\"]\n",
+    "            # prepare 930: select BA\n",
+    "            # eia = df_eia930[df_eia930.BA==ba][\"generation\"]\n",
     "            # prepare CEMS: select BA\n",
-    "            #c = cems[cems.ba_code==ba][\"net_generation_mwh\"]\n",
-    "            # calculate \n",
-    "            correlations.loc[ba,lag] = cems[ba]\\\n",
-    "                .corr(df_eia930[ba].shift(lag))\n",
+    "            # c = cems[cems.ba_code==ba][\"net_generation_mwh\"]\n",
+    "            # calculate\n",
+    "            correlations.loc[ba, lag] = cems[ba].corr(df_eia930[ba].shift(lag))\n",
     "\n",
     "    best = correlations.apply(lambda s: s.index[s.argmax()], axis=1).rename(\"best\")\n",
     "\n",
-    "    correlations = pd.concat([best, correlations], axis='columns')\n",
+    "    correlations = pd.concat([best, correlations], axis=\"columns\")\n",
     "    return correlations"
    ]
   },
@@ -246,8 +268,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems.drop_duplicates(subset=[\"datetime_utc\",\"ba_code\"], inplace=True)\n",
-    "#rolled_930.drop_duplicates(subset=[\"datetime_utc\",\"BA\"], inplace=True)"
+    "cems.drop_duplicates(subset=[\"datetime_utc\", \"ba_code\"], inplace=True)\n",
+    "# rolled_930.drop_duplicates(subset=[\"datetime_utc\",\"BA\"], inplace=True)"
    ]
   },
   {
@@ -258,18 +280,43 @@
    "source": [
     "# Calculate best correlations for shifted (no EBA cleaning) data\n",
     "\n",
-    "cems_930_cors = pd.concat([find_best_cor(cems, shifted).best.rename(\"all_years\"),\\\n",
-    "    find_best_cor(cems[cems.datetime_utc.dt.year==2019],shifted[shifted.datetime_utc.dt.year==2019]).best.rename(\"2019\"),\n",
-    "    find_best_cor(cems[cems.datetime_utc.dt.year==2020],shifted[shifted.datetime_utc.dt.year==2020]).best.rename(\"2020\"),\n",
-    "    find_best_cor(cems[cems.datetime_utc.dt.year==2021],shifted[shifted.datetime_utc.dt.year==2021]).best.rename(\"2021\"),\n",
-    "    find_best_cor(cems[(cems.datetime_utc.dt.month>=4)&(cems.datetime_utc.dt.month<=9)],\n",
-    "        shifted[(shifted.datetime_utc.dt.month>=4)&(shifted.datetime_utc.dt.month<=9)]).best.rename(\"daylight time\"),\n",
-    "    find_best_cor(cems[(cems.datetime_utc.dt.month>=11)|(cems.datetime_utc.dt.month<=2)],\n",
-    "        shifted[(shifted.datetime_utc.dt.month>=11)|(shifted.datetime_utc.dt.month<=2)]).best.rename(\"standard time\")],\n",
-    "    axis='columns')\n",
+    "cems_930_cors = pd.concat(\n",
+    "    [\n",
+    "        find_best_cor(cems, shifted).best.rename(\"all_years\"),\n",
+    "        find_best_cor(\n",
+    "            cems[cems.datetime_utc.dt.year == 2019],\n",
+    "            shifted[shifted.datetime_utc.dt.year == 2019],\n",
+    "        ).best.rename(\"2019\"),\n",
+    "        find_best_cor(\n",
+    "            cems[cems.datetime_utc.dt.year == 2020],\n",
+    "            shifted[shifted.datetime_utc.dt.year == 2020],\n",
+    "        ).best.rename(\"2020\"),\n",
+    "        find_best_cor(\n",
+    "            cems[cems.datetime_utc.dt.year == 2021],\n",
+    "            shifted[shifted.datetime_utc.dt.year == 2021],\n",
+    "        ).best.rename(\"2021\"),\n",
+    "        find_best_cor(\n",
+    "            cems[(cems.datetime_utc.dt.month >= 4) & (cems.datetime_utc.dt.month <= 9)],\n",
+    "            shifted[\n",
+    "                (shifted.datetime_utc.dt.month >= 4)\n",
+    "                & (shifted.datetime_utc.dt.month <= 9)\n",
+    "            ],\n",
+    "        ).best.rename(\"daylight time\"),\n",
+    "        find_best_cor(\n",
+    "            cems[\n",
+    "                (cems.datetime_utc.dt.month >= 11) | (cems.datetime_utc.dt.month <= 2)\n",
+    "            ],\n",
+    "            shifted[\n",
+    "                (shifted.datetime_utc.dt.month >= 11)\n",
+    "                | (shifted.datetime_utc.dt.month <= 2)\n",
+    "            ],\n",
+    "        ).best.rename(\"standard time\"),\n",
+    "    ],\n",
+    "    axis=\"columns\",\n",
+    ")\n",
     "\n",
     "cems_930_cors.to_csv(\"../../data/outputs/2021/cems_SHIFTEDeia930_cor_lags.csv\")\n",
-    "#cems_930_cors"
+    "# cems_930_cors"
    ]
   },
   {
@@ -280,15 +327,34 @@
    "source": [
     "# Calculate best correlations for raw data\n",
     "\n",
-    "cems_930_cors = pd.concat([find_best_cor(cems, raw).best.rename(\"all_years\"),\\\n",
-    "    find_best_cor(cems[cems.datetime_utc.dt.year==2019],raw[raw.datetime_utc.dt.year==2019]).best.rename(\"2019\"),\n",
-    "    find_best_cor(cems[cems.datetime_utc.dt.year==2020],raw[raw.datetime_utc.dt.year==2020]).best.rename(\"2020\"),\n",
-    "    find_best_cor(cems[cems.datetime_utc.dt.year==2021],raw[raw.datetime_utc.dt.year==2021]).best.rename(\"2021\"),\n",
-    "    find_best_cor(cems[(cems.datetime_utc.dt.month>=4)&(cems.datetime_utc.dt.month<=9)],\n",
-    "        raw[(raw.datetime_utc.dt.month>=4)&(raw.datetime_utc.dt.month<=9)]).best.rename(\"daylight time\"),\n",
-    "    find_best_cor(cems[(cems.datetime_utc.dt.month>=11)|(cems.datetime_utc.dt.month<=2)],\n",
-    "        raw[(raw.datetime_utc.dt.month>=11)|(raw.datetime_utc.dt.month<=2)]).best.rename(\"standard time\")],\n",
-    "    axis='columns')\n",
+    "cems_930_cors = pd.concat(\n",
+    "    [\n",
+    "        find_best_cor(cems, raw).best.rename(\"all_years\"),\n",
+    "        find_best_cor(\n",
+    "            cems[cems.datetime_utc.dt.year == 2019],\n",
+    "            raw[raw.datetime_utc.dt.year == 2019],\n",
+    "        ).best.rename(\"2019\"),\n",
+    "        find_best_cor(\n",
+    "            cems[cems.datetime_utc.dt.year == 2020],\n",
+    "            raw[raw.datetime_utc.dt.year == 2020],\n",
+    "        ).best.rename(\"2020\"),\n",
+    "        find_best_cor(\n",
+    "            cems[cems.datetime_utc.dt.year == 2021],\n",
+    "            raw[raw.datetime_utc.dt.year == 2021],\n",
+    "        ).best.rename(\"2021\"),\n",
+    "        find_best_cor(\n",
+    "            cems[(cems.datetime_utc.dt.month >= 4) & (cems.datetime_utc.dt.month <= 9)],\n",
+    "            raw[(raw.datetime_utc.dt.month >= 4) & (raw.datetime_utc.dt.month <= 9)],\n",
+    "        ).best.rename(\"daylight time\"),\n",
+    "        find_best_cor(\n",
+    "            cems[\n",
+    "                (cems.datetime_utc.dt.month >= 11) | (cems.datetime_utc.dt.month <= 2)\n",
+    "            ],\n",
+    "            raw[(raw.datetime_utc.dt.month >= 11) | (raw.datetime_utc.dt.month <= 2)],\n",
+    "        ).best.rename(\"standard time\"),\n",
+    "    ],\n",
+    "    axis=\"columns\",\n",
+    ")\n",
     "\n",
     "cems_930_cors.to_csv(\"../../data/outputs/2021/cems_RAWeia930_cor_lags.csv\")\n",
     "cems_930_cors"
@@ -300,17 +366,42 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "## Calculate correlations using different subsets of 930 data \n",
-    "\n",
-    "cems_930_cors = pd.concat([find_best_cor(cems, rolled_930).best.rename(\"all_years\"),\\\n",
-    "    find_best_cor(cems[cems.datetime_utc.dt.year==2019],rolled_930[rolled_930.datetime_utc.dt.year==2019]).best.rename(\"2019\"),\n",
-    "    find_best_cor(cems[cems.datetime_utc.dt.year==2020],rolled_930[rolled_930.datetime_utc.dt.year==2020]).best.rename(\"2020\"),\n",
-    "    find_best_cor(cems[cems.datetime_utc.dt.year==2021],rolled_930[rolled_930.datetime_utc.dt.year==2021]).best.rename(\"2021\"),\n",
-    "    find_best_cor(cems[(cems.datetime_utc.dt.month>=4)&(cems.datetime_utc.dt.month<=9)],\n",
-    "        rolled_930[(rolled_930.datetime_utc.dt.month>=4)&(rolled_930.datetime_utc.dt.month<=9)]).best.rename(\"daylight time\"),\n",
-    "    find_best_cor(cems[(cems.datetime_utc.dt.month>=11)|(cems.datetime_utc.dt.month<=2)],\n",
-    "        rolled_930[(rolled_930.datetime_utc.dt.month>=11)|(rolled_930.datetime_utc.dt.month<=2)]).best.rename(\"standard time\")],\n",
-    "    axis='columns')\n",
+    "## Calculate correlations using different subsets of 930 data\n",
+    "\n",
+    "cems_930_cors = pd.concat(\n",
+    "    [\n",
+    "        find_best_cor(cems, rolled_930).best.rename(\"all_years\"),\n",
+    "        find_best_cor(\n",
+    "            cems[cems.datetime_utc.dt.year == 2019],\n",
+    "            rolled_930[rolled_930.datetime_utc.dt.year == 2019],\n",
+    "        ).best.rename(\"2019\"),\n",
+    "        find_best_cor(\n",
+    "            cems[cems.datetime_utc.dt.year == 2020],\n",
+    "            rolled_930[rolled_930.datetime_utc.dt.year == 2020],\n",
+    "        ).best.rename(\"2020\"),\n",
+    "        find_best_cor(\n",
+    "            cems[cems.datetime_utc.dt.year == 2021],\n",
+    "            rolled_930[rolled_930.datetime_utc.dt.year == 2021],\n",
+    "        ).best.rename(\"2021\"),\n",
+    "        find_best_cor(\n",
+    "            cems[(cems.datetime_utc.dt.month >= 4) & (cems.datetime_utc.dt.month <= 9)],\n",
+    "            rolled_930[\n",
+    "                (rolled_930.datetime_utc.dt.month >= 4)\n",
+    "                & (rolled_930.datetime_utc.dt.month <= 9)\n",
+    "            ],\n",
+    "        ).best.rename(\"daylight time\"),\n",
+    "        find_best_cor(\n",
+    "            cems[\n",
+    "                (cems.datetime_utc.dt.month >= 11) | (cems.datetime_utc.dt.month <= 2)\n",
+    "            ],\n",
+    "            rolled_930[\n",
+    "                (rolled_930.datetime_utc.dt.month >= 11)\n",
+    "                | (rolled_930.datetime_utc.dt.month <= 2)\n",
+    "            ],\n",
+    "        ).best.rename(\"standard time\"),\n",
+    "    ],\n",
+    "    axis=\"columns\",\n",
+    ")\n",
     "\n",
     "cems_930_cors.to_csv(\"../../data/outputs/2021/cems_RAWeia930_cor_lags.csv\")\n",
     "cems_930_cors"
@@ -326,19 +417,27 @@
     "\n",
     "ba = \"SC\"\n",
     "\n",
-    "to_plot_930 = shifted[shifted.BA==ba].groupby(\"datetime_utc\").sum()\n",
+    "to_plot_930 = shifted[shifted.BA == ba].groupby(\"datetime_utc\").sum()\n",
     "\n",
     "print(f\"correlations for {ba}\")\n",
     "print(cems_930_cors.loc[ba])\n",
     "\n",
     "fig = go.Figure()\n",
-    "fig.add_trace(go.Scatter(x=cems[cems.ba_code==ba].datetime_utc, y=cems[cems.ba_code==ba].net_generation_mwh, name=\"CEMS\"))\n",
-    "fig.add_trace(go.Scatter(x=to_plot_930.index, y=to_plot_930.generation, name=\"EIA 930 (after adjustment and rolling cleaning)\"))\n",
-    "fig.update_layout(\n",
-    "    title=ba,\n",
-    "    xaxis_title=\"Date\",\n",
-    "    yaxis_title=\"Generation\"\n",
-    ")"
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=cems[cems.ba_code == ba].datetime_utc,\n",
+    "        y=cems[cems.ba_code == ba].net_generation_mwh,\n",
+    "        name=\"CEMS\",\n",
+    "    )\n",
+    ")\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=to_plot_930.index,\n",
+    "        y=to_plot_930.generation,\n",
+    "        name=\"EIA 930 (after adjustment and rolling cleaning)\",\n",
+    "    )\n",
+    ")\n",
+    "fig.update_layout(title=ba, xaxis_title=\"Date\", yaxis_title=\"Generation\")"
    ]
   },
   {
@@ -354,11 +453,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#interchange = pd.read_csv(\"../data/eia930/chalendar/EBA_rolling.csv\",index_col=0, parse_dates=True)\n",
+    "# interchange = pd.read_csv(\"../data/eia930/chalendar/EBA_rolling.csv\",index_col=0, parse_dates=True)\n",
     "interchanges = []\n",
-    "for year in [2019, 2020, 2021]: \n",
-    "    interchange = pd.read_csv(f\"../../data/outputs/{year}/eia930/eia930_raw.csv\",index_col=0, parse_dates=True)\n",
-    "    interchange = interchange[interchange.index.year == year] # limit to after gen was reported by fuel type\n",
+    "for year in [2019, 2020, 2021]:\n",
+    "    interchange = pd.read_csv(\n",
+    "        f\"../../data/outputs/{year}/eia930/eia930_raw.csv\",\n",
+    "        index_col=0,\n",
+    "        parse_dates=True,\n",
+    "    )\n",
+    "    interchange = interchange[\n",
+    "        interchange.index.year == year\n",
+    "    ]  # limit to after gen was reported by fuel type\n",
     "    interchanges.append(interchange)"
    ]
   },
@@ -377,7 +482,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "bas930 = {re.split(r\"[-.]\",c)[1] for c in interchange.columns}"
+    "bas930 = {re.split(r\"[-.]\", c)[1] for c in interchange.columns}"
    ]
   },
   {
@@ -388,47 +493,61 @@
    "source": [
     "# given a df where columns are interchange data, add best correlation between matching BAs to interchange_cors dict\n",
     "# optionally, write markdown to {file}.md and csvs at {file}_{ba}.csv\n",
-    "def interchange_cor(interchange, interchange_cors:dict={}, file=\"\", name:str=\"cors\"):\n",
+    "def interchange_cor(\n",
+    "    interchange, interchange_cors: dict = {}, file=\"\", name: str = \"cors\"\n",
+    "):\n",
     "    # Delete file\n",
     "    if file != \"\":\n",
-    "        hs = open(file+\".md\",\"w\")\n",
+    "        hs = open(file + \".md\", \"w\")\n",
     "        hs.write(\"\\n\\n\")\n",
-    "        hs.close() \n",
+    "        hs.close()\n",
     "\n",
     "    for ba in bas930:\n",
     "        print(ba, end=\"...\")\n",
-    "        other_cols = [c for c in interchange.columns \\\n",
-    "            if re.split(r\"[-.]\",c)[1]==ba \\\n",
-    "                and re.split(r\"[-.]\",c)[2]!=\"ALL\"]\n",
-    "        other_bas = [re.split(r\"[-.]\",c)[2] for c in other_cols]\n",
-    "        #print(f\"{ba} connects to {other_bas}\")\n",
-    "\n",
-    "        out = pd.DataFrame(index=other_bas, columns=range(-12,12), dtype=float)\n",
+    "        other_cols = [\n",
+    "            c\n",
+    "            for c in interchange.columns\n",
+    "            if re.split(r\"[-.]\", c)[1] == ba and re.split(r\"[-.]\", c)[2] != \"ALL\"\n",
+    "        ]\n",
+    "        other_bas = [re.split(r\"[-.]\", c)[2] for c in other_cols]\n",
+    "        # print(f\"{ba} connects to {other_bas}\")\n",
+    "\n",
+    "        out = pd.DataFrame(index=other_bas, columns=range(-12, 12), dtype=float)\n",
     "        for o_ba in out.index:\n",
     "            this_way = f\"EBA.{o_ba}-{ba}.ID.H\"\n",
     "            other_way = f\"EBA.{ba}-{o_ba}.ID.H\"\n",
-    "            if other_way not in interchange.columns or this_way not in interchange.columns: \n",
+    "            if (\n",
+    "                other_way not in interchange.columns\n",
+    "                or this_way not in interchange.columns\n",
+    "            ):\n",
     "                continue\n",
     "            for lag in out.columns:\n",
-    "                out.loc[o_ba,lag] = abs(interchange[this_way]\\\n",
-    "                    .corr(-1*interchange[other_way].shift(lag)))\n",
-    "        \n",
+    "                out.loc[o_ba, lag] = abs(\n",
+    "                    interchange[this_way].corr(-1 * interchange[other_way].shift(lag))\n",
+    "                )\n",
+    "\n",
     "        # where is correlation the best?\n",
-    "        out = pd.concat([out, out.apply(lambda s: s.index[s.argmax()], axis=1).rename(\"best\")], axis='columns')\n",
+    "        out = pd.concat(\n",
+    "            [out, out.apply(lambda s: s.index[s.argmax()], axis=1).rename(\"best\")],\n",
+    "            axis=\"columns\",\n",
+    "        )\n",
     "\n",
     "        if file != \"\":\n",
     "            # add new lines for proper markdown syntax\n",
-    "            hs = open(file+\".md\",\"a\")\n",
+    "            hs = open(file + \".md\", \"a\")\n",
     "            hs.write(f\"\\n\\n# {ba}\\n\\n\")\n",
-    "            hs.close() \n",
+    "            hs.close()\n",
     "\n",
-    "            out.to_markdown(file+\".md\",mode=\"a\")\n",
+    "            out.to_markdown(file + \".md\", mode=\"a\")\n",
     "\n",
-    "            out.to_csv(f\"{file}_{ba}\"+\".csv\")\n",
+    "            out.to_csv(f\"{file}_{ba}\" + \".csv\")\n",
     "\n",
-    "        interchange_cors[ba] = pd.concat([interchange_cors.get(ba, pd.DataFrame()), out.best.rename(name)], axis='columns')\n",
+    "        interchange_cors[ba] = pd.concat(\n",
+    "            [interchange_cors.get(ba, pd.DataFrame()), out.best.rename(name)],\n",
+    "            axis=\"columns\",\n",
+    "        )\n",
     "\n",
-    "    return interchange_cors\n"
+    "    return interchange_cors"
    ]
   },
   {
@@ -438,11 +557,25 @@
    "outputs": [],
    "source": [
     "int_cors = interchange_cor(interchange, interchange_cors={}, name=\"all_years\")\n",
-    "int_cors = interchange_cor(interchange[\"2019-01-01T00:00\":\"2019-12-30T00:00\"], int_cors, name=\"2019\")\n",
-    "int_cors = interchange_cor(interchange[\"2020-01-01T00:00\":\"2020-12-30T00:00\"], int_cors, name=\"2020\")\n",
-    "int_cors = interchange_cor(interchange[\"2021-01-01T00:00\":\"2021-12-30T00:00\"], int_cors, name=\"2021\")\n",
-    "int_cors = interchange_cor(interchange[(interchange.index.month >= 4)&(interchange.index.month <=9)], int_cors, name=\"daylight savings\")\n",
-    "int_cors = interchange_cor(interchange[(interchange.index.month >= 11)|(interchange.index.month <=2)], int_cors, name=\"standard time\")\n"
+    "int_cors = interchange_cor(\n",
+    "    interchange[\"2019-01-01T00:00\":\"2019-12-30T00:00\"], int_cors, name=\"2019\"\n",
+    ")\n",
+    "int_cors = interchange_cor(\n",
+    "    interchange[\"2020-01-01T00:00\":\"2020-12-30T00:00\"], int_cors, name=\"2020\"\n",
+    ")\n",
+    "int_cors = interchange_cor(\n",
+    "    interchange[\"2021-01-01T00:00\":\"2021-12-30T00:00\"], int_cors, name=\"2021\"\n",
+    ")\n",
+    "int_cors = interchange_cor(\n",
+    "    interchange[(interchange.index.month >= 4) & (interchange.index.month <= 9)],\n",
+    "    int_cors,\n",
+    "    name=\"daylight savings\",\n",
+    ")\n",
+    "int_cors = interchange_cor(\n",
+    "    interchange[(interchange.index.month >= 11) | (interchange.index.month <= 2)],\n",
+    "    int_cors,\n",
+    "    name=\"standard time\",\n",
+    ")"
    ]
   },
   {
@@ -465,18 +598,17 @@
     "# Output to md file because that's an easy way to manually scan through BAs and look for anomalies\n",
     "\n",
     "file = \"../../data/outputs/2021/interchange_corr_summary_adjusted.md\"\n",
-    "hs = open(file,\"w\")\n",
+    "hs = open(file, \"w\")\n",
     "hs.write(\"\\n\\n\")\n",
-    "hs.close() \n",
-    "\n",
-    "for (ba,out) in int_cors.items():\n",
+    "hs.close()\n",
     "\n",
+    "for ba, out in int_cors.items():\n",
     "    # add new lines for proper markdown syntax\n",
-    "            hs = open(file,\"a\")\n",
-    "            hs.write(f\"\\n\\n# {ba}\\n\\n\")\n",
-    "            hs.close() \n",
+    "    hs = open(file, \"a\")\n",
+    "    hs.write(f\"\\n\\n# {ba}\\n\\n\")\n",
+    "    hs.close()\n",
     "\n",
-    "            out.to_markdown(file,mode=\"a\")"
+    "    out.to_markdown(file, mode=\"a\")"
    ]
   },
   {
@@ -496,7 +628,13 @@
     "ba2 = \"MISO\"\n",
     "\n",
     "fig = px.line(interchange[f\"EBA.{ba1}-{ba2}.ID.H\"])\n",
-    "fig.add_trace(go.Scatter(x=interchange.index, y=interchange[f\"EBA.{ba2}-{ba1}.ID.H\"], name=f\"EBA.{ba2}-{ba1}.ID.H\"))"
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=interchange.index,\n",
+    "        y=interchange[f\"EBA.{ba2}-{ba1}.ID.H\"],\n",
+    "        name=f\"EBA.{ba2}-{ba1}.ID.H\",\n",
+    "    )\n",
+    ")"
    ]
   },
   {
@@ -507,26 +645,28 @@
    "source": [
     "ba = \"PJM\"\n",
     "\n",
-    "# find cols of mappings in both directions \n",
-    "other_cols = [c for c in interchange.columns \\\n",
-    "    if re.split(r\"[-.]\",c)[1]==ba \\\n",
-    "        and re.split(r\"[-.]\",c)[2]!=\"ALL\"]\n",
-    "other_bas = [re.split(r\"[-.]\",c)[2] for c in other_cols]\n",
+    "# find cols of mappings in both directions\n",
+    "other_cols = [\n",
+    "    c\n",
+    "    for c in interchange.columns\n",
+    "    if re.split(r\"[-.]\", c)[1] == ba and re.split(r\"[-.]\", c)[2] != \"ALL\"\n",
+    "]\n",
+    "other_bas = [re.split(r\"[-.]\", c)[2] for c in other_cols]\n",
     "\n",
     "these_cols = [f\"EBA.{o_ba}-{ba}.ID.H\" for o_ba in other_bas]\n",
     "\n",
     "# make long version with just cols of interest, adding BA column and to/from column\n",
     "toplot = pd.DataFrame()\n",
-    "for i in range(len(other_bas)): \n",
+    "for i in range(len(other_bas)):\n",
     "    to_add = (interchange[other_cols[i]]).rename(\"interchange\").to_frame()\n",
     "    to_add[\"source\"] = ba\n",
     "    to_add[\"BA\"] = other_bas[i]\n",
     "\n",
-    "    to_add_2 = (interchange[these_cols[i]]*(-1)).rename(\"interchange\").to_frame()\n",
+    "    to_add_2 = (interchange[these_cols[i]] * (-1)).rename(\"interchange\").to_frame()\n",
     "    to_add_2[\"source\"] = \"other BA\"\n",
     "    to_add_2[\"BA\"] = other_bas[i]\n",
     "\n",
-    "    toplot = pd.concat([toplot, to_add, to_add_2], axis='index')\n"
+    "    toplot = pd.concat([toplot, to_add, to_add_2], axis=\"index\")"
    ]
   },
   {
@@ -535,14 +675,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fig = px.line(toplot, x=toplot.index, y=\"interchange\", facet_col=\"BA\", facet_col_wrap=2, color=\"source\")\n",
+    "fig = px.line(\n",
+    "    toplot,\n",
+    "    x=toplot.index,\n",
+    "    y=\"interchange\",\n",
+    "    facet_col=\"BA\",\n",
+    "    facet_col_wrap=2,\n",
+    "    color=\"source\",\n",
+    ")\n",
     "fig.update_layout(\n",
     "    title=f\"Interchange from {ba}\",\n",
     "    xaxis_title=\"Date\",\n",
     "    yaxis_title=\"Interchange\",\n",
-    "    legend_title=\"Source for<br>interchange data\"\n",
+    "    legend_title=\"Source for<br>interchange data\",\n",
     ")\n",
-    "fig.for_each_annotation(lambda a: a.update(text=\"Other \"+a.text))"
+    "fig.for_each_annotation(lambda a: a.update(text=\"Other \" + a.text))"
    ]
   },
   {
@@ -551,16 +698,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "first=\"PJM\"\n",
-    "second=\"MISO\"\n",
-    "\n",
-    "fig = px.line(interchange, x=interchange.index, y=[f\"EBA.{first}-{second}.ID.H\",f\"EBA.{second}-{first}.ID.H\", f\"EBA.{first}-ALL.TI.H\"])\n",
+    "first = \"PJM\"\n",
+    "second = \"MISO\"\n",
+    "\n",
+    "fig = px.line(\n",
+    "    interchange,\n",
+    "    x=interchange.index,\n",
+    "    y=[\n",
+    "        f\"EBA.{first}-{second}.ID.H\",\n",
+    "        f\"EBA.{second}-{first}.ID.H\",\n",
+    "        f\"EBA.{first}-ALL.TI.H\",\n",
+    "    ],\n",
+    ")\n",
     "\n",
     "fig.update_layout(\n",
     "    title=f\"{first}/{second} interchange\",\n",
     "    xaxis_title=\"Date\",\n",
     "    yaxis_title=\"Interchange\",\n",
-    "    legend_title=\"Series\"\n",
+    "    legend_title=\"Series\",\n",
     ")"
    ]
   },
@@ -573,14 +728,18 @@
     "ba = \"CFE\"\n",
     "\n",
     "fig = go.Figure()\n",
-    "fig.add_trace(go.Scatter(x=interchange.index, \n",
-    "    y=interchange[f\"EBA.{ba}-ALL.D.H\"]-interchange[f\"EBA.{ba}-ALL.NG.H\"]))\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=interchange.index,\n",
+    "        y=interchange[f\"EBA.{ba}-ALL.D.H\"] - interchange[f\"EBA.{ba}-ALL.NG.H\"],\n",
+    "    )\n",
+    ")\n",
     "\n",
     "fig.update_layout(\n",
     "    title=f\"{ba} demand - generation\",\n",
     "    xaxis_title=\"Date\",\n",
     "    yaxis_title=\"Demand - generation\",\n",
-    "    legend_title=\"Series\"\n",
+    "    legend_title=\"Series\",\n",
     ")"
    ]
   },
@@ -601,31 +760,36 @@
    "source": [
     "# given a df where columns are interchange data, add best correlation between matching BAs to interchange_cors dict\n",
     "# optionally, write markdown to {file}.md and csvs at {file}_{ba}.csv\n",
-    "def interchange_sign(interchange, i_sign:dict={}, file=\"\", name:str=\"cors\"):\n",
+    "def interchange_sign(interchange, i_sign: dict = {}, file=\"\", name: str = \"cors\"):\n",
     "    for ba in bas930:\n",
     "        print(ba, end=\"...\")\n",
-    "        other_cols = [c for c in interchange.columns \\\n",
-    "            if re.split(r\"[-.]\",c)[1]==ba \\\n",
-    "                and re.split(r\"[-.]\",c)[2]!=\"ALL\"]\n",
-    "        other_bas = [re.split(r\"[-.]\",c)[2] for c in other_cols]\n",
-    "        #print(f\"{ba} connects to {other_bas}\")\n",
-    "\n",
-    "        out = pd.DataFrame(index=other_bas, columns=range(-12,12), dtype=float)\n",
+    "        other_cols = [\n",
+    "            c\n",
+    "            for c in interchange.columns\n",
+    "            if re.split(r\"[-.]\", c)[1] == ba and re.split(r\"[-.]\", c)[2] != \"ALL\"\n",
+    "        ]\n",
+    "        other_bas = [re.split(r\"[-.]\", c)[2] for c in other_cols]\n",
+    "        # print(f\"{ba} connects to {other_bas}\")\n",
+    "\n",
+    "        out = pd.DataFrame(index=other_bas, columns=range(-12, 12), dtype=float)\n",
     "        for o_ba in out.index:\n",
     "            this_way = f\"EBA.{o_ba}-{ba}.ID.H\"\n",
     "            other_way = f\"EBA.{ba}-{o_ba}.ID.H\"\n",
-    "            if other_way not in interchange or this_way not in interchange: \n",
-    "                continue \n",
+    "            if other_way not in interchange or this_way not in interchange:\n",
+    "                continue\n",
     "            for lag in out.columns:\n",
-    "                out.loc[o_ba,lag] = interchange[this_way]\\\n",
-    "                    .corr(-1*interchange[other_way].shift(lag))\n",
-    "        \n",
+    "                out.loc[o_ba, lag] = interchange[this_way].corr(\n",
+    "                    -1 * interchange[other_way].shift(lag)\n",
+    "                )\n",
+    "\n",
     "        # where is correlation the best?\n",
     "        out = out.apply(lambda s: s.iloc[abs(s).argmax()], axis=1)\n",
     "\n",
-    "        i_sign[ba] = pd.concat([i_sign.get(ba, pd.DataFrame()), out.rename(name)], axis='columns')\n",
+    "        i_sign[ba] = pd.concat(\n",
+    "            [i_sign.get(ba, pd.DataFrame()), out.rename(name)], axis=\"columns\"\n",
+    "        )\n",
     "\n",
-    "    return i_sign\n"
+    "    return i_sign"
    ]
   },
   {
@@ -635,11 +799,25 @@
    "outputs": [],
    "source": [
     "int_sign = interchange_sign(interchange, {}, name=\"all_years\")\n",
-    "int_sign = interchange_sign(interchange[\"2019-01-01T00:00\":\"2019-12-30T00:00\"], int_sign, name=\"2019\")\n",
-    "int_sign = interchange_sign(interchange[\"2020-01-01T00:00\":\"2020-12-30T00:00\"], int_sign, name=\"2020\")\n",
-    "int_sign = interchange_sign(interchange[\"2020-01-01T00:00\":\"2020-12-30T00:00\"], int_sign, name=\"2021\")\n",
-    "int_sign = interchange_sign(interchange[(interchange.index.month >= 4)&(interchange.index.month <=9)], int_sign, name=\"daylight savings\")\n",
-    "int_sign = interchange_sign(interchange[(interchange.index.month >= 11)|(interchange.index.month <=2)], int_sign, name=\"standard time\")"
+    "int_sign = interchange_sign(\n",
+    "    interchange[\"2019-01-01T00:00\":\"2019-12-30T00:00\"], int_sign, name=\"2019\"\n",
+    ")\n",
+    "int_sign = interchange_sign(\n",
+    "    interchange[\"2020-01-01T00:00\":\"2020-12-30T00:00\"], int_sign, name=\"2020\"\n",
+    ")\n",
+    "int_sign = interchange_sign(\n",
+    "    interchange[\"2020-01-01T00:00\":\"2020-12-30T00:00\"], int_sign, name=\"2021\"\n",
+    ")\n",
+    "int_sign = interchange_sign(\n",
+    "    interchange[(interchange.index.month >= 4) & (interchange.index.month <= 9)],\n",
+    "    int_sign,\n",
+    "    name=\"daylight savings\",\n",
+    ")\n",
+    "int_sign = interchange_sign(\n",
+    "    interchange[(interchange.index.month >= 11) | (interchange.index.month <= 2)],\n",
+    "    int_sign,\n",
+    "    name=\"standard time\",\n",
+    ")"
    ]
   },
   {
@@ -649,18 +827,17 @@
    "outputs": [],
    "source": [
     "file = f\"{outputs_folder('2021')}/interchange_cors_sign.md\"\n",
-    "hs = open(file,\"w\")\n",
+    "hs = open(file, \"w\")\n",
     "hs.write(\"\\n\\n\")\n",
-    "hs.close() \n",
-    "\n",
-    "for (ba,out) in int_sign.items():\n",
+    "hs.close()\n",
     "\n",
+    "for ba, out in int_sign.items():\n",
     "    # add new lines for proper markdown syntax\n",
-    "            hs = open(file,\"a\")\n",
-    "            hs.write(f\"\\n\\n# {ba}\\n\\n\")\n",
-    "            hs.close() \n",
+    "    hs = open(file, \"a\")\n",
+    "    hs.write(f\"\\n\\n# {ba}\\n\\n\")\n",
+    "    hs.close()\n",
     "\n",
-    "            out.to_markdown(file,mode=\"a\")"
+    "    out.to_markdown(file, mode=\"a\")"
    ]
   }
  ],
diff --git a/notebooks/manual_data/manually_identify_crosswalk_updates.ipynb b/notebooks/manual_data/manually_identify_crosswalk_updates.ipynb
index 51bd809c..8e4de0c9 100644
--- a/notebooks/manual_data/manually_identify_crosswalk_updates.ipynb
+++ b/notebooks/manual_data/manually_identify_crosswalk_updates.ipynb
@@ -16,7 +16,7 @@
    "outputs": [],
    "source": [
     "import pandas as pd\n",
-    "import sqlalchemy as sa \n",
+    "import sqlalchemy as sa\n",
     "import pudl.output"
    ]
   },
@@ -26,7 +26,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "year =2020"
+    "year = 2020"
    ]
   },
   {
@@ -38,11 +38,11 @@
     "# load raw cems data\n",
     "cems_path = f\"../data/downloads/pudl/pudl_data/parquet/epacems/year={year}\"\n",
     "cems = pd.read_parquet(cems_path).rename(\n",
-    "        columns={\n",
-    "            \"plant_id_eia\": \"plant_id_epa\",\n",
-    "            \"heat_content_mmbtu\": \"fuel_consumed_mmbtu\",\n",
-    "        }\n",
-    "    )\n",
+    "    columns={\n",
+    "        \"plant_id_eia\": \"plant_id_epa\",\n",
+    "        \"heat_content_mmbtu\": \"fuel_consumed_mmbtu\",\n",
+    "    }\n",
+    ")\n",
     "\n",
     "# load crosswalk data\n",
     "crosswalk = pudl.output.epacems.epa_crosswalk()\n",
@@ -51,8 +51,8 @@
     "pudl_db = \"sqlite:///../data/downloads/pudl/pudl_data/sqlite/pudl.sqlite\"\n",
     "pudl_engine = sa.create_engine(pudl_db)\n",
     "pudl_out = pudl.output.pudltabl.PudlTabl(\n",
-    "        pudl_engine, freq=\"MS\", start_date=f\"{year}-01-01\", end_date=f\"{year}-12-31\"\n",
-    "    )\n",
+    "    pudl_engine, freq=\"MS\", start_date=f\"{year}-01-01\", end_date=f\"{year}-12-31\"\n",
+    ")\n",
     "gens_860 = pudl_out.gens_eia860()"
    ]
   },
@@ -83,7 +83,7 @@
     "    pd.read_csv(\n",
     "        \"../data/manual/egrid_static_tables/table_4-2_plants_not_connected_to_grid.csv\"\n",
     "    )[\"Plant ID\"]\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -92,7 +92,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ids[ids['plant_id_epa'].isin(ngc_plants)].to_clipboard()"
+    "ids[ids[\"plant_id_epa\"].isin(ngc_plants)].to_clipboard()"
    ]
   },
   {
@@ -108,7 +108,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "missing_eia = crosswalk[~crosswalk['CAMD_PLANT_ID'].isna() & crosswalk['EIA_PLANT_ID'].isna()]\n",
+    "missing_eia = crosswalk[\n",
+    "    ~crosswalk[\"CAMD_PLANT_ID\"].isna() & crosswalk[\"EIA_PLANT_ID\"].isna()\n",
+    "]\n",
     "missing_eia"
    ]
   },
@@ -118,22 +120,47 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "missing_ids = missing_eia[['CAMD_PLANT_ID','CAMD_UNIT_ID','CAMD_GENERATOR_ID']].drop_duplicates()\n",
+    "missing_ids = missing_eia[\n",
+    "    [\"CAMD_PLANT_ID\", \"CAMD_UNIT_ID\", \"CAMD_GENERATOR_ID\"]\n",
+    "].drop_duplicates()\n",
     "\n",
-    "missing_ids = missing_ids.merge(gens_860[['plant_id_eia','generator_id']], how='left', left_on=['CAMD_PLANT_ID','CAMD_UNIT_ID'], right_on=['plant_id_eia','generator_id'])\n",
-    "missing_ids = missing_ids.merge(gens_860[['plant_id_eia','generator_id']], how='left', left_on=['CAMD_PLANT_ID','CAMD_GENERATOR_ID'], right_on=['plant_id_eia','generator_id'], suffixes=('_u','_g'))\n",
+    "missing_ids = missing_ids.merge(\n",
+    "    gens_860[[\"plant_id_eia\", \"generator_id\"]],\n",
+    "    how=\"left\",\n",
+    "    left_on=[\"CAMD_PLANT_ID\", \"CAMD_UNIT_ID\"],\n",
+    "    right_on=[\"plant_id_eia\", \"generator_id\"],\n",
+    ")\n",
+    "missing_ids = missing_ids.merge(\n",
+    "    gens_860[[\"plant_id_eia\", \"generator_id\"]],\n",
+    "    how=\"left\",\n",
+    "    left_on=[\"CAMD_PLANT_ID\", \"CAMD_GENERATOR_ID\"],\n",
+    "    right_on=[\"plant_id_eia\", \"generator_id\"],\n",
+    "    suffixes=(\"_u\", \"_g\"),\n",
+    ")\n",
     "\n",
     "# identify the source of the data\n",
-    "missing_ids['source'] = ''\n",
-    "missing_ids.loc[~missing_ids['plant_id_eia_u'].isna(), 'source'] = \"CAMD_UNIT_ID matches EIA_GENERATOR_ID\"\n",
-    "missing_ids.loc[~missing_ids['plant_id_eia_g'].isna(), 'source'] = \"CAMD_GENERATOR_ID matches EIA_GENERATOR_ID\"\n",
+    "missing_ids[\"source\"] = \"\"\n",
+    "missing_ids.loc[\n",
+    "    ~missing_ids[\"plant_id_eia_u\"].isna(), \"source\"\n",
+    "] = \"CAMD_UNIT_ID matches EIA_GENERATOR_ID\"\n",
+    "missing_ids.loc[\n",
+    "    ~missing_ids[\"plant_id_eia_g\"].isna(), \"source\"\n",
+    "] = \"CAMD_GENERATOR_ID matches EIA_GENERATOR_ID\"\n",
     "\n",
     "# fill nas in each group so that we can identify where values match\n",
-    "missing_ids['generator_id_u'] = missing_ids['generator_id_u'].fillna(missing_ids['generator_id_g'])\n",
-    "missing_ids['generator_id_g'] = missing_ids['generator_id_g'].fillna(missing_ids['generator_id_u'])\n",
+    "missing_ids[\"generator_id_u\"] = missing_ids[\"generator_id_u\"].fillna(\n",
+    "    missing_ids[\"generator_id_g\"]\n",
+    ")\n",
+    "missing_ids[\"generator_id_g\"] = missing_ids[\"generator_id_g\"].fillna(\n",
+    "    missing_ids[\"generator_id_u\"]\n",
+    ")\n",
     "\n",
-    "missing_ids['plant_id_eia_u'] = missing_ids['plant_id_eia_u'].fillna(missing_ids['plant_id_eia_g'])\n",
-    "missing_ids['plant_id_eia_g'] = missing_ids['plant_id_eia_g'].fillna(missing_ids['plant_id_eia_u'])\n"
+    "missing_ids[\"plant_id_eia_u\"] = missing_ids[\"plant_id_eia_u\"].fillna(\n",
+    "    missing_ids[\"plant_id_eia_g\"]\n",
+    ")\n",
+    "missing_ids[\"plant_id_eia_g\"] = missing_ids[\"plant_id_eia_g\"].fillna(\n",
+    "    missing_ids[\"plant_id_eia_u\"]\n",
+    ")"
    ]
   },
   {
@@ -143,7 +170,13 @@
    "outputs": [],
    "source": [
     "# identify where we identified a consistent generator match\n",
-    "unit_manual_match = (missing_ids[missing_ids['generator_id_u'] == missing_ids['generator_id_g']]).drop(columns=['plant_id_eia_g','generator_id_g']).rename(columns={'plant_id_eia_u':'plant_id_eia','generator_id_u':'generator_id'})\n",
+    "unit_manual_match = (\n",
+    "    (missing_ids[missing_ids[\"generator_id_u\"] == missing_ids[\"generator_id_g\"]])\n",
+    "    .drop(columns=[\"plant_id_eia_g\", \"generator_id_g\"])\n",
+    "    .rename(\n",
+    "        columns={\"plant_id_eia_u\": \"plant_id_eia\", \"generator_id_u\": \"generator_id\"}\n",
+    "    )\n",
+    ")\n",
     "unit_manual_match"
    ]
   },
@@ -153,7 +186,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "unit_manual_match.to_csv('../data/outputs/crosswalk_unit_manual_matches.csv')"
+    "unit_manual_match.to_csv(\"../data/outputs/crosswalk_unit_manual_matches.csv\")"
    ]
   },
   {
@@ -163,7 +196,9 @@
    "outputs": [],
    "source": [
     "# identify where the two matching methods returned different generator matches\n",
-    "multi_match = missing_ids[missing_ids['generator_id_u'] != missing_ids['generator_id_g']]\n",
+    "multi_match = missing_ids[\n",
+    "    missing_ids[\"generator_id_u\"] != missing_ids[\"generator_id_g\"]\n",
+    "]\n",
     "multi_match"
    ]
   },
@@ -173,7 +208,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "multi_match.to_csv('../data/outputs/crosswalk_unit_manual_matches_multi.csv')"
+    "multi_match.to_csv(\"../data/outputs/crosswalk_unit_manual_matches_multi.csv\")"
    ]
   },
   {
@@ -182,7 +217,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "gens_860[gens_860['plant_id_eia'] == 3443]"
+    "gens_860[gens_860[\"plant_id_eia\"] == 3443]"
    ]
   },
   {
@@ -198,8 +233,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "missing_from_cw = ids.merge(crosswalk[['CAMD_PLANT_ID','CAMD_UNIT_ID']], how='left', left_on=['plant_id_epa','emissions_unit_id_epa'], right_on=['CAMD_PLANT_ID','CAMD_UNIT_ID'])\n",
-    "missing_from_cw = missing_from_cw[missing_from_cw['CAMD_UNIT_ID'].isna()]"
+    "missing_from_cw = ids.merge(\n",
+    "    crosswalk[[\"CAMD_PLANT_ID\", \"CAMD_UNIT_ID\"]],\n",
+    "    how=\"left\",\n",
+    "    left_on=[\"plant_id_epa\", \"emissions_unit_id_epa\"],\n",
+    "    right_on=[\"CAMD_PLANT_ID\", \"CAMD_UNIT_ID\"],\n",
+    ")\n",
+    "missing_from_cw = missing_from_cw[missing_from_cw[\"CAMD_UNIT_ID\"].isna()]"
    ]
   },
   {
@@ -208,10 +248,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "missing_from_cw = missing_from_cw.merge(gens_860[['plant_id_eia','generator_id']], how='left', left_on=['plant_id_epa','emissions_unit_id_epa'], right_on=['plant_id_eia','generator_id'])\n",
+    "missing_from_cw = missing_from_cw.merge(\n",
+    "    gens_860[[\"plant_id_eia\", \"generator_id\"]],\n",
+    "    how=\"left\",\n",
+    "    left_on=[\"plant_id_epa\", \"emissions_unit_id_epa\"],\n",
+    "    right_on=[\"plant_id_eia\", \"generator_id\"],\n",
+    ")\n",
     "\n",
-    "missing_from_cw['source'] = ''\n",
-    "missing_from_cw.loc[~missing_from_cw['generator_id'].isna(), 'source'] = \"CAMD_UNIT_ID matches EIA_GENERATOR_ID\"\n",
+    "missing_from_cw[\"source\"] = \"\"\n",
+    "missing_from_cw.loc[\n",
+    "    ~missing_from_cw[\"generator_id\"].isna(), \"source\"\n",
+    "] = \"CAMD_UNIT_ID matches EIA_GENERATOR_ID\"\n",
     "\n",
     "missing_from_cw"
    ]
@@ -222,7 +269,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "missing_from_cw.to_csv('../data/outputs/missing_from_crosswalk.csv', index=False)"
+    "missing_from_cw.to_csv(\"../data/outputs/missing_from_crosswalk.csv\", index=False)"
    ]
   },
   {
@@ -231,7 +278,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "gens_860.loc[gens_860['plant_id_eia'] == 55641, ['plant_id_eia','generator_id','prime_mover_code']]"
+    "gens_860.loc[\n",
+    "    gens_860[\"plant_id_eia\"] == 55641,\n",
+    "    [\"plant_id_eia\", \"generator_id\", \"prime_mover_code\"],\n",
+    "]"
    ]
   }
  ],
diff --git a/notebooks/manual_data/manually_update_OTH_fuel_code.ipynb b/notebooks/manual_data/manually_update_OTH_fuel_code.ipynb
index 0472d9e2..af7ec875 100644
--- a/notebooks/manual_data/manually_update_OTH_fuel_code.ipynb
+++ b/notebooks/manual_data/manually_update_OTH_fuel_code.ipynb
@@ -15,7 +15,8 @@
     "import plotly.express as px\n",
     "\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
     "\n",
     "import load_data\n",
     "import data_cleaning"
@@ -28,7 +29,7 @@
    "outputs": [],
    "source": [
     "year = 2021\n",
-    "pudl_out = load_data.initialize_pudl_out(year=year)\n"
+    "pudl_out = load_data.initialize_pudl_out(year=year)"
    ]
   },
   {
@@ -49,8 +50,8 @@
     "    y=\"fuel_mmbtu_per_unit\",\n",
     "    title=f\"Range of heat contents for each fuel reported in {year}\",\n",
     "    width=1000,\n",
-    "    height=600\n",
-    ")\n"
+    "    height=600,\n",
+    ")"
    ]
   },
   {
@@ -59,7 +60,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plants_with_oth = fuel_heat_content[fuel_heat_content[\"energy_source_code\"] == \"OTH\"].copy()\n",
+    "plants_with_oth = fuel_heat_content[\n",
+    "    fuel_heat_content[\"energy_source_code\"] == \"OTH\"\n",
+    "].copy()\n",
     "plants_with_oth = plants_with_oth.groupby(\"plant_id_eia\").mean()\n",
     "plants_with_oth"
    ]
@@ -70,7 +73,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fuel_heat_content[fuel_heat_content[\"plant_id_eia\"] == 902]\n"
+    "fuel_heat_content[fuel_heat_content[\"plant_id_eia\"] == 902]"
    ]
   },
   {
@@ -81,7 +84,7 @@
    "source": [
     "# load EPA fuel type data\n",
     "epa_fuel_types = data_cleaning.get_epa_unit_fuel_types(year)\n",
-    "epa_fuel_types[epa_fuel_types[\"energy_source_code\"] == \"OTH\"]\n"
+    "epa_fuel_types[epa_fuel_types[\"energy_source_code\"] == \"OTH\"]"
    ]
   },
   {
@@ -92,7 +95,7 @@
    "source": [
     "# Load EIA-860 to examine whether a plant is retired\n",
     "gens_860 = pudl_out.gens_eia860()\n",
-    "gens_860[gens_860[\"plant_id_eia\"] == 60670]\n"
+    "gens_860[gens_860[\"plant_id_eia\"] == 60670]"
    ]
   }
  ],
diff --git a/notebooks/manual_data/manually_update_ba_reference.ipynb b/notebooks/manual_data/manually_update_ba_reference.ipynb
index b63ceb80..ab7f108f 100644
--- a/notebooks/manual_data/manually_update_ba_reference.ipynb
+++ b/notebooks/manual_data/manually_update_ba_reference.ipynb
@@ -22,7 +22,7 @@
     "import os\n",
     "import requests\n",
     "import pandas as pd\n",
-    "import numpy as np\n"
+    "import numpy as np"
    ]
   },
   {
@@ -36,7 +36,12 @@
     "\n",
     "# merge the ferc data into the manual table\n",
     "ba_reference_updated = ba_reference.merge(\n",
-    "    ferc_bas, how=\"outer\", on=\"ba_code\", indicator=\"source\", suffixes=(None, \"_ferc\"), validate=\"1:1\"\n",
+    "    ferc_bas,\n",
+    "    how=\"outer\",\n",
+    "    on=\"ba_code\",\n",
+    "    indicator=\"source\",\n",
+    "    suffixes=(None, \"_ferc\"),\n",
+    "    validate=\"1:1\",\n",
     ")\n",
     "\n",
     "# fill any missing data in the manual table with the data from ferc\n",
@@ -64,7 +69,7 @@
     "ba_reference_updated = ba_reference_updated.drop(columns=[\"us_ba_ferc\"])\n",
     "\n",
     "\n",
-    "ba_reference_updated\n"
+    "ba_reference_updated"
    ]
   },
   {
@@ -73,7 +78,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ba_reference_updated.to_csv(\"../../data/manual/ba_reference_updated.csv\", index=False)\n"
+    "ba_reference_updated.to_csv(\"../../data/manual/ba_reference_updated.csv\", index=False)"
    ]
   }
  ],
diff --git a/notebooks/manual_data/update_utility_name_ba_map.ipynb b/notebooks/manual_data/update_utility_name_ba_map.ipynb
index ffddb998..ccc4a527 100644
--- a/notebooks/manual_data/update_utility_name_ba_map.ipynb
+++ b/notebooks/manual_data/update_utility_name_ba_map.ipynb
@@ -17,7 +17,8 @@
     "\n",
     "# # Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
     "\n",
     "import load_data\n",
     "from column_checks import get_dtypes\n",
@@ -85,7 +86,11 @@
    "outputs": [],
    "source": [
     "# what are all the utility names not mapped to a BA?\n",
-    "list(plant_ba.loc[plant_ba[\"balancing_authority_code_eia\"].isna(), \"utility_name_eia\"].unique())"
+    "list(\n",
+    "    plant_ba.loc[\n",
+    "        plant_ba[\"balancing_authority_code_eia\"].isna(), \"utility_name_eia\"\n",
+    "    ].unique()\n",
+    ")"
    ]
   },
   {
@@ -95,7 +100,12 @@
    "outputs": [],
    "source": [
     "# what are all the utility names not mapped to a BA?\n",
-    "list(plant_ba.loc[plant_ba[\"balancing_authority_code_eia\"].isna(), \"transmission_distribution_owner_name\"].unique())"
+    "list(\n",
+    "    plant_ba.loc[\n",
+    "        plant_ba[\"balancing_authority_code_eia\"].isna(),\n",
+    "        \"transmission_distribution_owner_name\",\n",
+    "    ].unique()\n",
+    ")"
    ]
   },
   {
@@ -104,7 +114,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plant_ba[plant_ba[\"balancing_authority_code_eia\"].isna() & (plant_ba[\"utility_name_eia\"] == \"Pacific Gas & Electric Co\")]"
+    "plant_ba[\n",
+    "    plant_ba[\"balancing_authority_code_eia\"].isna()\n",
+    "    & (plant_ba[\"utility_name_eia\"] == \"Pacific Gas & Electric Co\")\n",
+    "]"
    ]
   }
  ],
diff --git a/notebooks/manual_data/zip_data.ipynb b/notebooks/manual_data/zip_data.ipynb
index d67b546b..1660e436 100644
--- a/notebooks/manual_data/zip_data.ipynb
+++ b/notebooks/manual_data/zip_data.ipynb
@@ -18,9 +18,10 @@
     "\n",
     "# # Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
     "\n",
-    "import output_data\n"
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
+    "\n",
+    "import output_data"
    ]
   },
   {
@@ -29,7 +30,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "years = [2019,2020,2021]\n",
+    "years = [2019, 2020, 2021]\n",
     "\n",
     "output_data.prepare_files_for_upload(years)"
    ]
@@ -40,7 +41,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "for year in [2019,2020,2021]:\n",
+    "for year in [2019, 2020, 2021]:\n",
     "    output_data.zip_results_for_s3(year)"
    ]
   },
@@ -50,7 +51,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "for year in [2019,2020,2021]:\n",
+    "for year in [2019, 2020, 2021]:\n",
     "    output_data.zip_data_for_zenodo(year)"
    ]
   }
diff --git a/notebooks/validation/data_validation.ipynb b/notebooks/validation/data_validation.ipynb
index 557a1543..60cecf2e 100644
--- a/notebooks/validation/data_validation.ipynb
+++ b/notebooks/validation/data_validation.ipynb
@@ -22,9 +22,10 @@
     "%reload_ext autoreload\n",
     "%autoreload 2\n",
     "\n",
-    "# Tell python where to look for modules. \n",
+    "# Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
     "\n",
     "# import local modules\n",
     "import load_data\n",
@@ -49,8 +50,13 @@
    "outputs": [],
    "source": [
     "year = 2020\n",
-    "cems = pd.read_csv(f'../data/outputs/cems_subplant_{year}.csv', parse_dates=['datetime_utc','report_date'])\n",
-    "eia923_allocated = pd.read_csv(f'../data/outputs/eia923_allocated_{year}.csv', parse_dates=['report_date'])"
+    "cems = pd.read_csv(\n",
+    "    f\"../data/outputs/cems_subplant_{year}.csv\",\n",
+    "    parse_dates=[\"datetime_utc\", \"report_date\"],\n",
+    ")\n",
+    "eia923_allocated = pd.read_csv(\n",
+    "    f\"../data/outputs/eia923_allocated_{year}.csv\", parse_dates=[\"report_date\"]\n",
+    ")"
    ]
   },
   {
@@ -61,7 +67,12 @@
    "source": [
     "# what percent of emissions is reported in CEMS vs EIA\n",
     "# NOTE: This does not include emissions only reported by CEMS, so the % may be higher\n",
-    "(eia923_allocated.groupby('hourly_data_source')['co2_mass_lb_adjusted'].sum() / eia923_allocated.groupby('hourly_data_source')['co2_mass_lb_adjusted'].sum().sum(axis=0)).round(3)"
+    "(\n",
+    "    eia923_allocated.groupby(\"hourly_data_source\")[\"co2_mass_lb_adjusted\"].sum()\n",
+    "    / eia923_allocated.groupby(\"hourly_data_source\")[\"co2_mass_lb_adjusted\"]\n",
+    "    .sum()\n",
+    "    .sum(axis=0)\n",
+    ").round(3)"
    ]
   },
   {
@@ -84,10 +95,21 @@
    "source": [
     "# perform checks on allocated data\n",
     "# fuel consumption and co2 emissions should be positive\n",
-    "negative_test = validation.test_for_negative_values(eia923_allocated, ['fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb','co2_mass_lb_for_electricity','co2_mass_lb_adjusted'])\n",
+    "negative_test = validation.test_for_negative_values(\n",
+    "    eia923_allocated,\n",
+    "    [\n",
+    "        \"fuel_consumed_mmbtu\",\n",
+    "        \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "        \"co2_mass_lb\",\n",
+    "        \"co2_mass_lb_for_electricity\",\n",
+    "        \"co2_mass_lb_adjusted\",\n",
+    "    ],\n",
+    ")\n",
     "\n",
     "# if net generation is positive, fuel consumption should be non zero\n",
-    "missing_fuel_test = validation.test_for_missing_fuel(eia923_allocated, 'net_generation_mwh')\n",
+    "missing_fuel_test = validation.test_for_missing_fuel(\n",
+    "    eia923_allocated, \"net_generation_mwh\"\n",
+    ")\n",
     "\n",
     "# fuel consumed for electricity should be less than fuel consumed\n",
     "chp_allocation_test = validation.test_chp_allocation(eia923_allocated)\n",
@@ -96,16 +118,38 @@
     "missing_co2_test = validation.test_for_missing_co2(eia923_allocated)\n",
     "\n",
     "# check for generators with no data\n",
-    "missing_data_test = validation.test_for_missing_data(eia923_allocated, ['net_generation_mwh','fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb','co2_mass_lb_for_electricity','co2_mass_lb_adjusted'])\n",
+    "missing_data_test = validation.test_for_missing_data(\n",
+    "    eia923_allocated,\n",
+    "    [\n",
+    "        \"net_generation_mwh\",\n",
+    "        \"fuel_consumed_mmbtu\",\n",
+    "        \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "        \"co2_mass_lb\",\n",
+    "        \"co2_mass_lb_for_electricity\",\n",
+    "        \"co2_mass_lb_adjusted\",\n",
+    "    ],\n",
+    ")\n",
     "\n",
     "# check for generators with all data = 0\n",
-    "zero_data_test = validation.test_for_zero_data(eia923_allocated, ['net_generation_mwh','fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb','co2_mass_lb_for_electricity','co2_mass_lb_adjusted'])\n",
+    "zero_data_test = validation.test_for_zero_data(\n",
+    "    eia923_allocated,\n",
+    "    [\n",
+    "        \"net_generation_mwh\",\n",
+    "        \"fuel_consumed_mmbtu\",\n",
+    "        \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "        \"co2_mass_lb\",\n",
+    "        \"co2_mass_lb_for_electricity\",\n",
+    "        \"co2_mass_lb_adjusted\",\n",
+    "    ],\n",
+    ")\n",
     "\n",
     "# check for missing energy source code\n",
     "missing_esc_test = validation.test_for_missing_energy_source_code(eia923_allocated)\n",
     "\n",
     "# check for missing and incorrect prime movers\n",
-    "incorrect_pm_test, missing_pm_test = validation.test_for_missing_incorrect_prime_movers(eia923_allocated, year)\n",
+    "incorrect_pm_test, missing_pm_test = validation.test_for_missing_incorrect_prime_movers(\n",
+    "    eia923_allocated, year\n",
+    ")\n",
     "\n",
     "# check for missing subplant ids\n",
     "eia_missing_subplant_test = validation.test_for_missing_subplant_id(eia923_allocated)\n",
@@ -120,7 +164,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "heat_rate_test.sort_values(by='heat_rate')"
+    "heat_rate_test.sort_values(by=\"heat_rate\")"
    ]
   },
   {
@@ -137,10 +181,19 @@
    "outputs": [],
    "source": [
     "# fuel consumption and co2 emissions should be positive\n",
-    "cems_negative_test = validation.test_for_negative_values(cems, ['fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb','co2_mass_lb_adjusted', 'gross_generation_mwh'])\n",
+    "cems_negative_test = validation.test_for_negative_values(\n",
+    "    cems,\n",
+    "    [\n",
+    "        \"fuel_consumed_mmbtu\",\n",
+    "        \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "        \"co2_mass_lb\",\n",
+    "        \"co2_mass_lb_adjusted\",\n",
+    "        \"gross_generation_mwh\",\n",
+    "    ],\n",
+    ")\n",
     "\n",
     "# if net generation is positive, fuel consumption should be non zero\n",
-    "cems_missing_fuel_test = validation.test_for_missing_fuel(cems,'gross_generation_mwh')\n",
+    "cems_missing_fuel_test = validation.test_for_missing_fuel(cems, \"gross_generation_mwh\")\n",
     "\n",
     "# fuel consumed for electricity should be less than fuel consumed\n",
     "cems_chp_allocation_test = validation.test_chp_allocation(cems)\n",
@@ -155,7 +208,7 @@
     "cems_missing_subplant_test = validation.test_for_missing_subplant_id(cems)\n",
     "\n",
     "# test to see if there are any net generation values greater than gross generation\n",
-    "gtn_test = validation.test_gtn_results(cems)\n"
+    "gtn_test = validation.test_gtn_results(cems)"
    ]
   },
   {
@@ -164,7 +217,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems_missing_subplant_test[['plant_id_eia','emissions_unit_id_epa']].drop_duplicates()"
+    "cems_missing_subplant_test[[\"plant_id_eia\", \"emissions_unit_id_epa\"]].drop_duplicates()"
    ]
   },
   {
@@ -173,7 +226,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems_missing_esc_test[['plant_id_eia','emissions_unit_id_epa']].drop_duplicates()"
+    "cems_missing_esc_test[[\"plant_id_eia\", \"emissions_unit_id_epa\"]].drop_duplicates()"
    ]
   },
   {
@@ -197,12 +250,20 @@
    "outputs": [],
    "source": [
     "year = 2020\n",
-    "cems = pd.read_csv(f'../data/outputs/{year}/cems_{year}.csv', dtype=get_dtypes())\n",
-    "partial_cems_scaled = pd.read_csv(f'../data/outputs/{year}/partial_cems_scaled_{year}.csv', dtype=get_dtypes())\n",
-    "eia923_allocated = pd.read_csv(f'../data/outputs/{year}/eia923_allocated_{year}.csv', dtype=get_dtypes())\n",
+    "cems = pd.read_csv(f\"../data/outputs/{year}/cems_{year}.csv\", dtype=get_dtypes())\n",
+    "partial_cems_scaled = pd.read_csv(\n",
+    "    f\"../data/outputs/{year}/partial_cems_scaled_{year}.csv\", dtype=get_dtypes()\n",
+    ")\n",
+    "eia923_allocated = pd.read_csv(\n",
+    "    f\"../data/outputs/{year}/eia923_allocated_{year}.csv\", dtype=get_dtypes()\n",
+    ")\n",
     "\n",
-    "plant_attributes = pd.read_csv(f\"../data/outputs/{year}/plant_static_attributes_{year}.csv\")\n",
-    "eia923_allocated = eia923_allocated.merge(plant_attributes, how=\"left\", on=\"plant_id_eia\")\n",
+    "plant_attributes = pd.read_csv(\n",
+    "    f\"../data/outputs/{year}/plant_static_attributes_{year}.csv\"\n",
+    ")\n",
+    "eia923_allocated = eia923_allocated.merge(\n",
+    "    plant_attributes, how=\"left\", on=\"plant_id_eia\"\n",
+    ")\n",
     "cems = cems.merge(plant_attributes, how=\"left\", on=\"plant_id_eia\")"
    ]
   },
@@ -212,7 +273,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "partial_cems_scaled = partial_cems_scaled.merge(plant_attributes, how=\"left\", on=\"plant_id_eia\")"
+    "partial_cems_scaled = partial_cems_scaled.merge(\n",
+    "    plant_attributes, how=\"left\", on=\"plant_id_eia\"\n",
+    ")"
    ]
   },
   {
@@ -223,9 +286,14 @@
    "source": [
     "ba = \"CISO\"\n",
     "fuel = \"natural_gas\"\n",
-    "test_eia = eia923_allocated[(eia923_allocated[\"ba_code\"] == ba) & (eia923_allocated[\"fuel_category\"] == fuel)]\n",
+    "test_eia = eia923_allocated[\n",
+    "    (eia923_allocated[\"ba_code\"] == ba) & (eia923_allocated[\"fuel_category\"] == fuel)\n",
+    "]\n",
     "test_cems = cems[(cems[\"ba_code\"] == ba) & (cems[\"fuel_category\"] == fuel)]\n",
-    "test_pc = partial_cems_scaled[(partial_cems_scaled[\"ba_code\"] == ba) & (partial_cems_scaled[\"fuel_category\"] == fuel)]"
+    "test_pc = partial_cems_scaled[\n",
+    "    (partial_cems_scaled[\"ba_code\"] == ba)\n",
+    "    & (partial_cems_scaled[\"fuel_category\"] == fuel)\n",
+    "]"
    ]
   },
   {
@@ -234,7 +302,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "test_eia.groupby('hourly_data_source').sum()['net_generation_mwh']"
+    "test_eia.groupby(\"hourly_data_source\").sum()[\"net_generation_mwh\"]"
    ]
   },
   {
@@ -243,7 +311,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "test_cems[[\"gross_generation_mwh\",'net_generation_mwh']].sum()"
+    "test_cems[[\"gross_generation_mwh\", \"net_generation_mwh\"]].sum()"
    ]
   },
   {
@@ -252,7 +320,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "test_pc[['net_generation_mwh']].sum()"
+    "test_pc[[\"net_generation_mwh\"]].sum()"
    ]
   },
   {
@@ -261,7 +329,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "test_pc = test_pc.drop(columns='source')"
+    "test_pc = test_pc.drop(columns=\"source\")"
    ]
   },
   {
@@ -270,7 +338,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "test_cems = test_cems.drop(columns='source')"
+    "test_cems = test_cems.drop(columns=\"source\")"
    ]
   },
   {
@@ -300,7 +368,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "filtered_cems[[\"gross_generation_mwh\",'net_generation_mwh']].sum()"
+    "filtered_cems[[\"gross_generation_mwh\", \"net_generation_mwh\"]].sum()"
    ]
   },
   {
@@ -309,9 +377,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "subplants_ided_as_cems = test_eia.loc[test_eia[\"hourly_data_source\"] == 'cems', [\"plant_id_eia\",\"subplant_id\"]].drop_duplicates()\n",
-    "subplants_in_cems = filtered_cems[[\"plant_id_eia\",\"subplant_id\"]].drop_duplicates()\n",
-    "cems_overlap = subplants_ided_as_cems.merge(subplants_in_cems, how=\"outer\", on=[\"plant_id_eia\",\"subplant_id\"], indicator=\"source\")\n",
+    "subplants_ided_as_cems = test_eia.loc[\n",
+    "    test_eia[\"hourly_data_source\"] == \"cems\", [\"plant_id_eia\", \"subplant_id\"]\n",
+    "].drop_duplicates()\n",
+    "subplants_in_cems = filtered_cems[[\"plant_id_eia\", \"subplant_id\"]].drop_duplicates()\n",
+    "cems_overlap = subplants_ided_as_cems.merge(\n",
+    "    subplants_in_cems,\n",
+    "    how=\"outer\",\n",
+    "    on=[\"plant_id_eia\", \"subplant_id\"],\n",
+    "    indicator=\"source\",\n",
+    ")\n",
     "cems_overlap"
    ]
   },
@@ -321,9 +396,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "subplants_ided_as_pc = test_eia.loc[test_eia[\"hourly_data_source\"] == 'partial_cems', [\"plant_id_eia\",\"subplant_id\"]].drop_duplicates()\n",
-    "subplants_in_pc = test_pc[[\"plant_id_eia\",\"subplant_id\"]].drop_duplicates()\n",
-    "pc_overlap = subplants_ided_as_pc.merge(subplants_in_pc, how=\"outer\", on=[\"plant_id_eia\",\"subplant_id\"], indicator=\"source\")\n",
+    "subplants_ided_as_pc = test_eia.loc[\n",
+    "    test_eia[\"hourly_data_source\"] == \"partial_cems\", [\"plant_id_eia\", \"subplant_id\"]\n",
+    "].drop_duplicates()\n",
+    "subplants_in_pc = test_pc[[\"plant_id_eia\", \"subplant_id\"]].drop_duplicates()\n",
+    "pc_overlap = subplants_ided_as_pc.merge(\n",
+    "    subplants_in_pc, how=\"outer\", on=[\"plant_id_eia\", \"subplant_id\"], indicator=\"source\"\n",
+    ")\n",
     "pc_overlap"
    ]
   },
@@ -333,7 +412,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "test_cems.loc[test_cems['plant_id_eia'] == 55748, \"net_generation_mwh\"].sum()"
+    "test_cems.loc[test_cems[\"plant_id_eia\"] == 55748, \"net_generation_mwh\"].sum()"
    ]
   },
   {
@@ -350,15 +429,53 @@
    "outputs": [],
    "source": [
     "# for plants where there is data reported in cems, see how off it is from data reported in eia\n",
-    "cems_plant_monthly = cems.groupby(['plant_id_eia','subplant_id','report_date'], dropna=False).sum()[['gross_generation_mwh','net_generation_mwh','fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb','co2_mass_lb_adjusted']].reset_index()\n",
-    "gf_plant_monthly = eia923_allocated.groupby(['plant_id_eia','subplant_id','report_date'], dropna=False).sum().reset_index()\n",
-    "compare_cems_eia = gf_plant_monthly.merge(cems_plant_monthly, how='inner', on=['plant_id_eia','subplant_id','report_date'], suffixes=(\"_eia\",'_cems'))\n",
-    "\n",
+    "cems_plant_monthly = (\n",
+    "    cems.groupby([\"plant_id_eia\", \"subplant_id\", \"report_date\"], dropna=False)\n",
+    "    .sum()[\n",
+    "        [\n",
+    "            \"gross_generation_mwh\",\n",
+    "            \"net_generation_mwh\",\n",
+    "            \"fuel_consumed_mmbtu\",\n",
+    "            \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "            \"co2_mass_lb\",\n",
+    "            \"co2_mass_lb_adjusted\",\n",
+    "        ]\n",
+    "    ]\n",
+    "    .reset_index()\n",
+    ")\n",
+    "gf_plant_monthly = (\n",
+    "    eia923_allocated.groupby(\n",
+    "        [\"plant_id_eia\", \"subplant_id\", \"report_date\"], dropna=False\n",
+    "    )\n",
+    "    .sum()\n",
+    "    .reset_index()\n",
+    ")\n",
+    "compare_cems_eia = gf_plant_monthly.merge(\n",
+    "    cems_plant_monthly,\n",
+    "    how=\"inner\",\n",
+    "    on=[\"plant_id_eia\", \"subplant_id\", \"report_date\"],\n",
+    "    suffixes=(\"_eia\", \"_cems\"),\n",
+    ")\n",
     "\n",
-    "for column in ['net_generation_mwh','fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb','co2_mass_lb_adjusted']:\n",
-    "    compare_cems_eia[f'{column}_pctdiff'] = ((compare_cems_eia[f'{column}_cems'].replace(0,0.1) - compare_cems_eia[f'{column}_eia'].replace(0,0.1)) / compare_cems_eia[f'{column}_eia'].replace(0,0.1)).round(3)\n",
     "\n",
-    "compare_cems_eia = compare_cems_eia.set_index(['plant_id_eia','subplant_id','report_date'])\n",
+    "for column in [\n",
+    "    \"net_generation_mwh\",\n",
+    "    \"fuel_consumed_mmbtu\",\n",
+    "    \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "    \"co2_mass_lb\",\n",
+    "    \"co2_mass_lb_adjusted\",\n",
+    "]:\n",
+    "    compare_cems_eia[f\"{column}_pctdiff\"] = (\n",
+    "        (\n",
+    "            compare_cems_eia[f\"{column}_cems\"].replace(0, 0.1)\n",
+    "            - compare_cems_eia[f\"{column}_eia\"].replace(0, 0.1)\n",
+    "        )\n",
+    "        / compare_cems_eia[f\"{column}_eia\"].replace(0, 0.1)\n",
+    "    ).round(3)\n",
+    "\n",
+    "compare_cems_eia = compare_cems_eia.set_index(\n",
+    "    [\"plant_id_eia\", \"subplant_id\", \"report_date\"]\n",
+    ")\n",
     "compare_cems_eia = compare_cems_eia.reindex(sorted(compare_cems_eia.columns), axis=1)"
    ]
   },
@@ -369,10 +486,10 @@
    "outputs": [],
    "source": [
     "# identify where there are differences between reported CEMS and EIA values for the same subplant-month\n",
-    "value = 'net_generation_mwh'\n",
+    "value = \"net_generation_mwh\"\n",
     "\n",
-    "comparison = compare_cems_eia[[f'{value}_cems', f'{value}_eia', f'{value}_pctdiff']]\n",
-    "comparison[(~comparison[f'{value}_pctdiff'].between(-0.05,0.05))]"
+    "comparison = compare_cems_eia[[f\"{value}_cems\", f\"{value}_eia\", f\"{value}_pctdiff\"]]\n",
+    "comparison[(~comparison[f\"{value}_pctdiff\"].between(-0.05, 0.05))]"
    ]
   },
   {
@@ -389,10 +506,15 @@
    "outputs": [],
    "source": [
     "# filter the data for which we only have EIA data\n",
-    "monthly_eia_data_to_distribute = eia923_allocated[(eia923_allocated['hourly_data_source'] == 'eia') & ~(eia923_allocated['fuel_consumed_mmbtu'].isna())]\n",
+    "monthly_eia_data_to_distribute = eia923_allocated[\n",
+    "    (eia923_allocated[\"hourly_data_source\"] == \"eia\")\n",
+    "    & ~(eia923_allocated[\"fuel_consumed_mmbtu\"].isna())\n",
+    "]\n",
     "\n",
     "# assign ba codes to the data\n",
-    "monthly_eia_data_to_distribute = assign_ba_code_to_plant(monthly_eia_data_to_distribute, year)\n",
+    "monthly_eia_data_to_distribute = assign_ba_code_to_plant(\n",
+    "    monthly_eia_data_to_distribute, year\n",
+    ")\n",
     "cems = assign_ba_code_to_plant(cems, year)"
    ]
   },
@@ -406,14 +528,45 @@
     "###################################\n",
     "\n",
     "# Aggregate cems and eia data by plant id, then combine\n",
-    "cems_plant_annual = cems.groupby(['ba_code','state','plant_id_eia'], dropna=False).sum()[['net_generation_mwh','fuel_consumed_mmbtu','co2_mass_lb','co2_mass_lb_adjusted']].reset_index()\n",
-    "eia_plant_annual = monthly_eia_data_to_distribute.groupby(['ba_code','state','plant_id_eia'], dropna=False).sum()[['net_generation_mwh','fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb','co2_mass_lb_adjusted']].reset_index()\n",
-    "plant_annual_total = pd.concat([cems_plant_annual,eia_plant_annual], axis=0)\n",
+    "cems_plant_annual = (\n",
+    "    cems.groupby([\"ba_code\", \"state\", \"plant_id_eia\"], dropna=False)\n",
+    "    .sum()[\n",
+    "        [\n",
+    "            \"net_generation_mwh\",\n",
+    "            \"fuel_consumed_mmbtu\",\n",
+    "            \"co2_mass_lb\",\n",
+    "            \"co2_mass_lb_adjusted\",\n",
+    "        ]\n",
+    "    ]\n",
+    "    .reset_index()\n",
+    ")\n",
+    "eia_plant_annual = (\n",
+    "    monthly_eia_data_to_distribute.groupby(\n",
+    "        [\"ba_code\", \"state\", \"plant_id_eia\"], dropna=False\n",
+    "    )\n",
+    "    .sum()[\n",
+    "        [\n",
+    "            \"net_generation_mwh\",\n",
+    "            \"fuel_consumed_mmbtu\",\n",
+    "            \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "            \"co2_mass_lb\",\n",
+    "            \"co2_mass_lb_adjusted\",\n",
+    "        ]\n",
+    "    ]\n",
+    "    .reset_index()\n",
+    ")\n",
+    "plant_annual_total = pd.concat([cems_plant_annual, eia_plant_annual], axis=0)\n",
     "# group any plants that have records from both datasets\n",
-    "plant_annual_total = plant_annual_total.groupby(['ba_code','state','plant_id_eia'], dropna=False).sum().reset_index()\n",
+    "plant_annual_total = (\n",
+    "    plant_annual_total.groupby([\"ba_code\", \"state\", \"plant_id_eia\"], dropna=False)\n",
+    "    .sum()\n",
+    "    .reset_index()\n",
+    ")\n",
     "\n",
     "# add a egrid id\n",
-    "plant_annual_total = validation.add_egrid_plant_id(plant_annual_total, from_id='eia', to_id='egrid')\n",
+    "plant_annual_total = validation.add_egrid_plant_id(\n",
+    "    plant_annual_total, from_id=\"eia\", to_id=\"egrid\"\n",
+    ")\n",
     "\n",
     "# Load the eGRID plant table\n",
     "egrid_plant = validation.load_egrid_plant_file(year)"
@@ -435,14 +588,21 @@
    "outputs": [],
    "source": [
     "# identify any plants that are in egrid but not our totals, and any plants that are in our totals, but not egrid\n",
-    "plant_not_in_calc = list(set(egrid_plant['plant_id_eia'].unique()) - set(plant_annual_total['plant_id_eia'].unique()))\n",
+    "plant_not_in_calc = list(\n",
+    "    set(egrid_plant[\"plant_id_eia\"].unique())\n",
+    "    - set(plant_annual_total[\"plant_id_eia\"].unique())\n",
+    ")\n",
     "\n",
     "# Which plants are included in eGRID but are missing from our calculations?\n",
-    "missing_from_calc = egrid_plant[egrid_plant['plant_id_egrid'].isin(plant_not_in_calc)]\n",
+    "missing_from_calc = egrid_plant[egrid_plant[\"plant_id_egrid\"].isin(plant_not_in_calc)]\n",
     "\n",
     "# see if any of these plants are retired\n",
-    "generators_eia860 = load_data.load_pudl_table('generators_eia860', year=year)\n",
-    "missing_from_calc.merge(generators_eia860.groupby('plant_id_eia')['retirement_date'].unique().reset_index(), how='left', on='plant_id_eia')"
+    "generators_eia860 = load_data.load_pudl_table(\"generators_eia860\", year=year)\n",
+    "missing_from_calc.merge(\n",
+    "    generators_eia860.groupby(\"plant_id_eia\")[\"retirement_date\"].unique().reset_index(),\n",
+    "    how=\"left\",\n",
+    "    on=\"plant_id_eia\",\n",
+    ")"
    ]
   },
   {
@@ -459,10 +619,17 @@
    "outputs": [],
    "source": [
     "# Which plants are in our calculations, but are missing from eGRID?\n",
-    "plants_not_in_egrid = list(set(plant_annual_total['plant_id_egrid'].unique()) - set(egrid_plant['plant_id_egrid'].unique()))\n",
+    "plants_not_in_egrid = list(\n",
+    "    set(plant_annual_total[\"plant_id_egrid\"].unique())\n",
+    "    - set(egrid_plant[\"plant_id_egrid\"].unique())\n",
+    ")\n",
     "\n",
-    "plant_names = load_data.load_pudl_table('plants_entity_eia')[['plant_id_eia','plant_name_eia','sector_name_eia']]\n",
-    "missing_from_egrid = plant_annual_total[plant_annual_total['plant_id_egrid'].isin(plants_not_in_egrid)].merge(plant_names, how='left', on='plant_id_eia')\n",
+    "plant_names = load_data.load_pudl_table(\"plants_entity_eia\")[\n",
+    "    [\"plant_id_eia\", \"plant_name_eia\", \"sector_name_eia\"]\n",
+    "]\n",
+    "missing_from_egrid = plant_annual_total[\n",
+    "    plant_annual_total[\"plant_id_egrid\"].isin(plants_not_in_egrid)\n",
+    "].merge(plant_names, how=\"left\", on=\"plant_id_eia\")\n",
     "\n",
     "missing_from_egrid"
    ]
@@ -474,7 +641,7 @@
    "outputs": [],
    "source": [
     "# how many of the plants missing from egrid have non-zero data\n",
-    "missing_from_egrid[missing_from_egrid['fuel_consumed_mmbtu'] > 1].count()"
+    "missing_from_egrid[missing_from_egrid[\"fuel_consumed_mmbtu\"] > 1].count()"
    ]
   },
   {
@@ -491,11 +658,23 @@
    "outputs": [],
    "source": [
     "# identify where there is a single egrid plant id for multiple eia plant ids\n",
-    "double_ids = plant_annual_total[plant_annual_total['plant_id_egrid'].duplicated(keep=False)]\n",
-    "double_ids = double_ids.groupby('plant_id_egrid').sum()['net_generation_mwh'].reset_index() # focus on net generation for now\n",
+    "double_ids = plant_annual_total[\n",
+    "    plant_annual_total[\"plant_id_egrid\"].duplicated(keep=False)\n",
+    "]\n",
+    "double_ids = (\n",
+    "    double_ids.groupby(\"plant_id_egrid\").sum()[\"net_generation_mwh\"].reset_index()\n",
+    ")  # focus on net generation for now\n",
     "# merge the egrid data\n",
-    "double_ids = double_ids.merge(egrid_plant[['plant_id_egrid','net_generation_mwh']], how='left', on='plant_id_egrid', suffixes=('_calc','_egrid'))\n",
-    "double_ids['percent_diff'] = ((double_ids['net_generation_mwh_calc'] - double_ids['net_generation_mwh_egrid']) / double_ids['net_generation_mwh_egrid']).round(3)\n",
+    "double_ids = double_ids.merge(\n",
+    "    egrid_plant[[\"plant_id_egrid\", \"net_generation_mwh\"]],\n",
+    "    how=\"left\",\n",
+    "    on=\"plant_id_egrid\",\n",
+    "    suffixes=(\"_calc\", \"_egrid\"),\n",
+    ")\n",
+    "double_ids[\"percent_diff\"] = (\n",
+    "    (double_ids[\"net_generation_mwh_calc\"] - double_ids[\"net_generation_mwh_egrid\"])\n",
+    "    / double_ids[\"net_generation_mwh_egrid\"]\n",
+    ").round(3)\n",
     "double_ids"
    ]
   },
@@ -512,10 +691,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ba_code_match = egrid_plant.set_index('plant_id_eia')[['plant_name','ba_code']].merge(plant_annual_total.set_index('plant_id_eia')[['ba_code']], how='inner', left_index=True, right_index=True, suffixes=(\"_egrid\",'_calc'))\n",
+    "ba_code_match = egrid_plant.set_index(\"plant_id_eia\")[[\"plant_name\", \"ba_code\"]].merge(\n",
+    "    plant_annual_total.set_index(\"plant_id_eia\")[[\"ba_code\"]],\n",
+    "    how=\"inner\",\n",
+    "    left_index=True,\n",
+    "    right_index=True,\n",
+    "    suffixes=(\"_egrid\", \"_calc\"),\n",
+    ")\n",
     "\n",
     "# plants with missing ba code\n",
-    "ba_code_match[(ba_code_match['ba_code_calc'].isna()) & ~(ba_code_match['ba_code_egrid'].isna())]"
+    "ba_code_match[\n",
+    "    (ba_code_match[\"ba_code_calc\"].isna()) & ~(ba_code_match[\"ba_code_egrid\"].isna())\n",
+    "]"
    ]
   },
   {
@@ -525,7 +712,10 @@
    "outputs": [],
    "source": [
     "# plants with incorrect ba code\n",
-    "ba_code_match[(ba_code_match['ba_code_calc'] != ba_code_match['ba_code_egrid']) & ~(ba_code_match['ba_code_calc'].isna())]"
+    "ba_code_match[\n",
+    "    (ba_code_match[\"ba_code_calc\"] != ba_code_match[\"ba_code_egrid\"])\n",
+    "    & ~(ba_code_match[\"ba_code_calc\"].isna())\n",
+    "]"
    ]
   },
   {
@@ -598,9 +788,7 @@
     ") / egrid_eia_comparison[f\"{metric}_eia923\"]\n",
     "egrid_eia_comparison.loc[\n",
     "    egrid_eia_comparison[\"difference\"] == 0, \"percent_difference\"\n",
-    "] = 0\n",
-    "\n",
-    "\n"
+    "] = 0"
    ]
   },
   {
@@ -610,13 +798,20 @@
    "outputs": [],
    "source": [
     "# add cems data\n",
-    "cems_total = cems.copy()[['plant_id_eia',metric]]\n",
+    "cems_total = cems.copy()[[\"plant_id_eia\", metric]]\n",
     "cems_total[\"plant_id_egrid\"] = cems_total[\"plant_id_eia\"]\n",
     "cems_total[\"plant_id_egrid\"].update(cems_total[\"plant_id_egrid\"].map(eia_to_egrid_id))\n",
-    "cems_total = cems_total.groupby('plant_id_egrid').sum()[metric].reset_index().rename(columns={metric:f\"{metric}_cems\"})\n",
+    "cems_total = (\n",
+    "    cems_total.groupby(\"plant_id_egrid\")\n",
+    "    .sum()[metric]\n",
+    "    .reset_index()\n",
+    "    .rename(columns={metric: f\"{metric}_cems\"})\n",
+    ")\n",
     "\n",
-    "# merge cems data into egrid \n",
-    "egrid_eia_comparison = egrid_eia_comparison.merge(cems_total, how='outer', on='plant_id_egrid')"
+    "# merge cems data into egrid\n",
+    "egrid_eia_comparison = egrid_eia_comparison.merge(\n",
+    "    cems_total, how=\"outer\", on=\"plant_id_egrid\"\n",
+    ")"
    ]
   },
   {
@@ -641,7 +836,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "egrid_eia_comparison[egrid_eia_comparison['source'] == 'left_only']"
+    "egrid_eia_comparison[egrid_eia_comparison[\"source\"] == \"left_only\"]"
    ]
   },
   {
@@ -651,7 +846,9 @@
    "outputs": [],
    "source": [
     "# egrid seems to be missing fuel consumption data for most nuclear power plants\n",
-    "missing_nuclear = egrid_eia_comparison[egrid_eia_comparison['energy_source_code'] == 'NUC']\n",
+    "missing_nuclear = egrid_eia_comparison[\n",
+    "    egrid_eia_comparison[\"energy_source_code\"] == \"NUC\"\n",
+    "]\n",
     "missing_nuclear.sum()"
    ]
   },
@@ -661,7 +858,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "egrid_eia_comparison[(egrid_eia_comparison['percent_difference']  < - 0.01)]"
+    "egrid_eia_comparison[(egrid_eia_comparison[\"percent_difference\"] < -0.01)]"
    ]
   },
   {
@@ -671,7 +868,10 @@
    "outputs": [],
    "source": [
     "# where is egrid missing data?\n",
-    "egrid_eia_comparison[(egrid_eia_comparison['percent_difference'] < -0.01) & (egrid_eia_comparison['energy_source_code'] != 'NUC')]#.sort_values(by='percent_difference').head(20)"
+    "egrid_eia_comparison[\n",
+    "    (egrid_eia_comparison[\"percent_difference\"] < -0.01)\n",
+    "    & (egrid_eia_comparison[\"energy_source_code\"] != \"NUC\")\n",
+    "]  # .sort_values(by='percent_difference').head(20)"
    ]
   },
   {
@@ -682,12 +882,26 @@
    "source": [
     "# how much emissions does this account for?\n",
     "# group by fuel code\n",
-    "missing_emissions = egrid_eia_comparison[(egrid_eia_comparison['percent_difference'] < -0.01) & (egrid_eia_comparison['energy_source_code'] != 'NUC')].groupby('energy_source_code').sum().reset_index()\n",
+    "missing_emissions = (\n",
+    "    egrid_eia_comparison[\n",
+    "        (egrid_eia_comparison[\"percent_difference\"] < -0.01)\n",
+    "        & (egrid_eia_comparison[\"energy_source_code\"] != \"NUC\")\n",
+    "    ]\n",
+    "    .groupby(\"energy_source_code\")\n",
+    "    .sum()\n",
+    "    .reset_index()\n",
+    ")\n",
     "\n",
     "# get emission factors\n",
-    "emission_factors = load_data.load_ghg_emission_factors()[['energy_source_code', 'co2_lb_per_mmbtu']]\n",
-    "missing_emissions = missing_emissions.merge(emission_factors, how='left', on='energy_source_code')\n",
-    "missing_emissions['co2_mass_lb'] = missing_emissions['difference'] * missing_emissions['co2_lb_per_mmbtu']\n",
+    "emission_factors = load_data.load_ghg_emission_factors()[\n",
+    "    [\"energy_source_code\", \"co2_lb_per_mmbtu\"]\n",
+    "]\n",
+    "missing_emissions = missing_emissions.merge(\n",
+    "    emission_factors, how=\"left\", on=\"energy_source_code\"\n",
+    ")\n",
+    "missing_emissions[\"co2_mass_lb\"] = (\n",
+    "    missing_emissions[\"difference\"] * missing_emissions[\"co2_lb_per_mmbtu\"]\n",
+    ")\n",
     "missing_emissions.sum()"
    ]
   },
@@ -705,67 +919,196 @@
    "outputs": [],
    "source": [
     "# standardize column names and index so that the two dfs can be divided\n",
-    "calculated_to_compare = plant_annual_total.groupby('plant_id_egrid').sum().drop(columns=['plant_id_eia'])\n",
+    "calculated_to_compare = (\n",
+    "    plant_annual_total.groupby(\"plant_id_egrid\").sum().drop(columns=[\"plant_id_eia\"])\n",
+    ")\n",
     "\n",
     "# drop the plants that have no data in eGRID\n",
-    "plants_with_no_data_in_egrid = list(egrid_plant[egrid_plant[['net_generation_mwh','fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb','co2_mass_lb_adjusted']].sum(axis=1) == 0]['plant_id_egrid'])\n",
-    "egrid_plant = egrid_plant[~egrid_plant['plant_id_eia'].isin(plants_with_no_data_in_egrid)]\n",
+    "plants_with_no_data_in_egrid = list(\n",
+    "    egrid_plant[\n",
+    "        egrid_plant[\n",
+    "            [\n",
+    "                \"net_generation_mwh\",\n",
+    "                \"fuel_consumed_mmbtu\",\n",
+    "                \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "                \"co2_mass_lb\",\n",
+    "                \"co2_mass_lb_adjusted\",\n",
+    "            ]\n",
+    "        ].sum(axis=1)\n",
+    "        == 0\n",
+    "    ][\"plant_id_egrid\"]\n",
+    ")\n",
+    "egrid_plant = egrid_plant[\n",
+    "    ~egrid_plant[\"plant_id_eia\"].isin(plants_with_no_data_in_egrid)\n",
+    "]\n",
     "\n",
-    "egrid_to_compare = egrid_plant.set_index(['plant_id_egrid']).drop(columns=['ba_code','state','plant_name','plant_id_eia'])\n",
+    "egrid_to_compare = egrid_plant.set_index([\"plant_id_egrid\"]).drop(\n",
+    "    columns=[\"ba_code\", \"state\", \"plant_name\", \"plant_id_eia\"]\n",
+    ")\n",
     "\n",
     "# divide calculated value by egrid value\n",
-    "compared = calculated_to_compare.div(egrid_to_compare).merge(egrid_plant[['plant_id_egrid','plant_name','ba_code', 'state']], how='left', left_index=True, right_on='plant_id_egrid').set_index('plant_id_egrid')\n",
-    "compared['plant_name'] = compared['plant_name'].fillna('unknown')\n",
+    "compared = (\n",
+    "    calculated_to_compare.div(egrid_to_compare)\n",
+    "    .merge(\n",
+    "        egrid_plant[[\"plant_id_egrid\", \"plant_name\", \"ba_code\", \"state\"]],\n",
+    "        how=\"left\",\n",
+    "        left_index=True,\n",
+    "        right_on=\"plant_id_egrid\",\n",
+    "    )\n",
+    "    .set_index(\"plant_id_egrid\")\n",
+    ")\n",
+    "compared[\"plant_name\"] = compared[\"plant_name\"].fillna(\"unknown\")\n",
     "\n",
     "# create a dataframe that merges the two sources of data together\n",
-    "compared_merged = calculated_to_compare.merge(egrid_to_compare, how='outer', on='plant_id_egrid', suffixes=('_calc','_egrid'))\n",
+    "compared_merged = calculated_to_compare.merge(\n",
+    "    egrid_to_compare, how=\"outer\", on=\"plant_id_egrid\", suffixes=(\"_calc\", \"_egrid\")\n",
+    ")\n",
     "\n",
     "# for each column, change missing values to zero if both values are zero (only nan b/c divide by zero)\n",
-    "for col in ['net_generation_mwh','fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb_adjusted','co2_mass_lb']:\n",
+    "for col in [\n",
+    "    \"net_generation_mwh\",\n",
+    "    \"fuel_consumed_mmbtu\",\n",
+    "    \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "    \"co2_mass_lb_adjusted\",\n",
+    "    \"co2_mass_lb\",\n",
+    "]:\n",
     "    # identify plants with zero values for both\n",
-    "    plant_ids = list(compared_merged[(compared_merged[f'{col}_calc'] == 0) & (compared_merged[f'{col}_egrid'] == 0)].index)\n",
+    "    plant_ids = list(\n",
+    "        compared_merged[\n",
+    "            (compared_merged[f\"{col}_calc\"] == 0)\n",
+    "            & (compared_merged[f\"{col}_egrid\"] == 0)\n",
+    "        ].index\n",
+    "    )\n",
     "    compared.loc[compared.index.isin(plant_ids), col] = 1\n",
     "\n",
     "# for each column, categorize the data based on how far it is off from egrid\n",
-    "for col in ['net_generation_mwh','fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb_adjusted','co2_mass_lb']:\n",
+    "for col in [\n",
+    "    \"net_generation_mwh\",\n",
+    "    \"fuel_consumed_mmbtu\",\n",
+    "    \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "    \"co2_mass_lb_adjusted\",\n",
+    "    \"co2_mass_lb\",\n",
+    "]:\n",
     "    # add a new column\n",
-    "    compared[f'{col}_status'] = pd.cut(x=compared[col], \n",
-    "                                       bins=[-999999999,0,0.5,0.9,0.99,0.9999,1,1.0001,1.01,1.1,1.5,999999999], \n",
-    "                                       labels=['negative','<50%','+/-50%','+/-10%','+/-1%','!exact','!exact','+/-1%','+/-10%','+/-50%','>50%'], \n",
-    "                                       ordered=False)\n",
+    "    compared[f\"{col}_status\"] = pd.cut(\n",
+    "        x=compared[col],\n",
+    "        bins=[\n",
+    "            -999999999,\n",
+    "            0,\n",
+    "            0.5,\n",
+    "            0.9,\n",
+    "            0.99,\n",
+    "            0.9999,\n",
+    "            1,\n",
+    "            1.0001,\n",
+    "            1.01,\n",
+    "            1.1,\n",
+    "            1.5,\n",
+    "            999999999,\n",
+    "        ],\n",
+    "        labels=[\n",
+    "            \"negative\",\n",
+    "            \"<50%\",\n",
+    "            \"+/-50%\",\n",
+    "            \"+/-10%\",\n",
+    "            \"+/-1%\",\n",
+    "            \"!exact\",\n",
+    "            \"!exact\",\n",
+    "            \"+/-1%\",\n",
+    "            \"+/-10%\",\n",
+    "            \"+/-50%\",\n",
+    "            \">50%\",\n",
+    "        ],\n",
+    "        ordered=False,\n",
+    "    )\n",
     "    # replace any missing values with missing\n",
-    "    compared[f'{col}_status'] = compared[f'{col}_status'].astype(str)  \n",
-    "    compared[f'{col}_status'] = compared[f'{col}_status'].fillna('missing')\n",
-    "    compared[f'{col}_status'] = compared[f'{col}_status'].replace('nan','missing')\n",
-    "    compared.loc[(compared.index.isin(plants_not_in_egrid)),f'{col}_status'] = 'not_in_egrid'\n",
+    "    compared[f\"{col}_status\"] = compared[f\"{col}_status\"].astype(str)\n",
+    "    compared[f\"{col}_status\"] = compared[f\"{col}_status\"].fillna(\"missing\")\n",
+    "    compared[f\"{col}_status\"] = compared[f\"{col}_status\"].replace(\"nan\", \"missing\")\n",
+    "    compared.loc[\n",
+    "        (compared.index.isin(plants_not_in_egrid)), f\"{col}_status\"\n",
+    "    ] = \"not_in_egrid\"\n",
     "\n",
     "# identify which plants are missing from egrid vs calculated values\n",
-    "for col in ['net_generation_mwh','fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb_adjusted','co2_mass_lb']:\n",
+    "for col in [\n",
+    "    \"net_generation_mwh\",\n",
+    "    \"fuel_consumed_mmbtu\",\n",
+    "    \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "    \"co2_mass_lb_adjusted\",\n",
+    "    \"co2_mass_lb\",\n",
+    "]:\n",
     "    # identify plants that are missing in egrid\n",
-    "    plants_missing_egrid = list(compared_merged[(compared_merged[f'{col}_calc'] > 0) & (compared_merged[f'{col}_egrid'].isna())].index)\n",
-    "    compared.loc[compared.index.isin(plants_missing_egrid), f'{col}_status'] = 'missing_in_egrid'\n",
+    "    plants_missing_egrid = list(\n",
+    "        compared_merged[\n",
+    "            (compared_merged[f\"{col}_calc\"] > 0)\n",
+    "            & (compared_merged[f\"{col}_egrid\"].isna())\n",
+    "        ].index\n",
+    "    )\n",
+    "    compared.loc[\n",
+    "        compared.index.isin(plants_missing_egrid), f\"{col}_status\"\n",
+    "    ] = \"missing_in_egrid\"\n",
     "    # identify plants that are missing from our calculations\n",
-    "    plants_missing_calc = list(compared_merged[(compared_merged[f'{col}_calc'].isna()) & (compared_merged[f'{col}_egrid'] > 0)].index)\n",
-    "    compared.loc[compared.index.isin(plants_missing_calc), f'{col}_status'] = 'missing_in_calc'\n",
+    "    plants_missing_calc = list(\n",
+    "        compared_merged[\n",
+    "            (compared_merged[f\"{col}_calc\"].isna())\n",
+    "            & (compared_merged[f\"{col}_egrid\"] > 0)\n",
+    "        ].index\n",
+    "    )\n",
+    "    compared.loc[\n",
+    "        compared.index.isin(plants_missing_calc), f\"{col}_status\"\n",
+    "    ] = \"missing_in_calc\"\n",
     "    # identify where our calculations are missing a zero value\n",
-    "    plants_missing_zero_calc = list(compared_merged[(compared_merged[f'{col}_calc'].isna()) & (compared_merged[f'{col}_egrid'] == 0)].index)\n",
-    "    compared.loc[compared.index.isin(plants_missing_zero_calc), f'{col}_status'] = 'missing_zero_in_calc'\n",
+    "    plants_missing_zero_calc = list(\n",
+    "        compared_merged[\n",
+    "            (compared_merged[f\"{col}_calc\"].isna())\n",
+    "            & (compared_merged[f\"{col}_egrid\"] == 0)\n",
+    "        ].index\n",
+    "    )\n",
+    "    compared.loc[\n",
+    "        compared.index.isin(plants_missing_zero_calc), f\"{col}_status\"\n",
+    "    ] = \"missing_zero_in_calc\"\n",
     "    # identify where egrid has a missing value instead of a zero\n",
-    "    plants_missing_zero_egrid = list(compared_merged[(compared_merged[f'{col}_calc'] == 0) & (compared_merged[f'{col}_egrid'].isna())].index)\n",
-    "    compared.loc[compared.index.isin(plants_missing_zero_egrid), f'{col}_status'] = 'missing_zero_in_egrid'\n",
+    "    plants_missing_zero_egrid = list(\n",
+    "        compared_merged[\n",
+    "            (compared_merged[f\"{col}_calc\"] == 0)\n",
+    "            & (compared_merged[f\"{col}_egrid\"].isna())\n",
+    "        ].index\n",
+    "    )\n",
+    "    compared.loc[\n",
+    "        compared.index.isin(plants_missing_zero_egrid), f\"{col}_status\"\n",
+    "    ] = \"missing_zero_in_egrid\"\n",
     "    # identify where egrid has a zero value where we have a positive value\n",
-    "    plants_incorrect_zero_egrid = list(compared_merged[(compared_merged[f'{col}_calc'] > 0) & (compared_merged[f'{col}_egrid'] == 0)].index)\n",
-    "    compared.loc[compared.index.isin(plants_incorrect_zero_egrid), f'{col}_status'] = '>50%'\n",
+    "    plants_incorrect_zero_egrid = list(\n",
+    "        compared_merged[\n",
+    "            (compared_merged[f\"{col}_calc\"] > 0)\n",
+    "            & (compared_merged[f\"{col}_egrid\"] == 0)\n",
+    "        ].index\n",
+    "    )\n",
+    "    compared.loc[\n",
+    "        compared.index.isin(plants_incorrect_zero_egrid), f\"{col}_status\"\n",
+    "    ] = \">50%\"\n",
     "\n",
     "# create a dataframe that counts how many plants are in each category\n",
     "comparison_count = []\n",
-    "for col in ['net_generation_mwh','fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb_adjusted','co2_mass_lb']:\n",
-    "    count = compared.groupby(f'{col}_status', dropna=False).count()['plant_name'].rename(col)\n",
-    "    count.index = count.index.rename('status')\n",
+    "for col in [\n",
+    "    \"net_generation_mwh\",\n",
+    "    \"fuel_consumed_mmbtu\",\n",
+    "    \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "    \"co2_mass_lb_adjusted\",\n",
+    "    \"co2_mass_lb\",\n",
+    "]:\n",
+    "    count = (\n",
+    "        compared.groupby(f\"{col}_status\", dropna=False)\n",
+    "        .count()[\"plant_name\"]\n",
+    "        .rename(col)\n",
+    "    )\n",
+    "    count.index = count.index.rename(\"status\")\n",
     "    comparison_count.append(count)\n",
     "\n",
     "comparison_count = pd.concat(comparison_count, axis=1).fillna(0).astype(int)\n",
-    "comparison_count = pd.concat([comparison_count, pd.DataFrame(comparison_count.sum().rename('Total')).T], axis=0)   \n",
+    "comparison_count = pd.concat(\n",
+    "    [comparison_count, pd.DataFrame(comparison_count.sum().rename(\"Total\")).T], axis=0\n",
+    ")\n",
     "comparison_count"
    ]
   },
@@ -800,28 +1143,59 @@
     "                                    'BANGENAN':'net_generation_mwh',\n",
     "                                    'BACO2AN':'co2_mass_lb'})\"\"\"\n",
     "\n",
-    "data_columns = ['net_generation_mwh','fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb','co2_mass_lb_adjusted']\n",
+    "data_columns = [\n",
+    "    \"net_generation_mwh\",\n",
+    "    \"fuel_consumed_mmbtu\",\n",
+    "    \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "    \"co2_mass_lb\",\n",
+    "    \"co2_mass_lb_adjusted\",\n",
+    "]\n",
     "\n",
-    "#aggregate the plant data up to the BA level\n",
-    "egrid_ba = egrid_plant.groupby(['ba_code']).sum()[data_columns].reset_index()\n",
+    "# aggregate the plant data up to the BA level\n",
+    "egrid_ba = egrid_plant.groupby([\"ba_code\"]).sum()[data_columns].reset_index()\n",
     "\n",
     "# divide our calculation by the BA totals from eGRID\n",
     "# if there are 0 values, replace with 0.1, so that div by zero doesn't return missing value\n",
-    "ba_metric = plant_annual_total.groupby('ba_code', dropna=False).sum().drop(columns='plant_id_eia').replace(0,0.1).div(egrid_ba.set_index('ba_code').replace(0,0.1)).sort_values(by='co2_mass_lb').round(3)\n",
+    "ba_metric = (\n",
+    "    plant_annual_total.groupby(\"ba_code\", dropna=False)\n",
+    "    .sum()\n",
+    "    .drop(columns=\"plant_id_eia\")\n",
+    "    .replace(0, 0.1)\n",
+    "    .div(egrid_ba.set_index(\"ba_code\").replace(0, 0.1))\n",
+    "    .sort_values(by=\"co2_mass_lb\")\n",
+    "    .round(3)\n",
+    ")\n",
     "\n",
-    "total = pd.DataFrame(plant_annual_total[data_columns].sum().div(egrid_ba[data_columns].sum()).rename('Total')).T\n",
+    "total = pd.DataFrame(\n",
+    "    plant_annual_total[data_columns]\n",
+    "    .sum()\n",
+    "    .div(egrid_ba[data_columns].sum())\n",
+    "    .rename(\"Total\")\n",
+    ").T\n",
     "\n",
     "# calculate the difference in the number of plants in each region\n",
-    "plant_count = (plant_annual_total.groupby('ba_code', dropna=False).count()['plant_id_egrid'] - egrid_plant.groupby('ba_code', dropna=False).count()['plant_id_egrid']).rename('num_plants')\n",
-    "ba_metric = ba_metric.merge(plant_count, how='left', left_index=True, right_index=True).drop(columns=['plant_id_egrid']).sort_index()\n",
+    "plant_count = (\n",
+    "    plant_annual_total.groupby(\"ba_code\", dropna=False).count()[\"plant_id_egrid\"]\n",
+    "    - egrid_plant.groupby(\"ba_code\", dropna=False).count()[\"plant_id_egrid\"]\n",
+    ").rename(\"num_plants\")\n",
+    "ba_metric = (\n",
+    "    ba_metric.merge(plant_count, how=\"left\", left_index=True, right_index=True)\n",
+    "    .drop(columns=[\"plant_id_egrid\"])\n",
+    "    .sort_index()\n",
+    ")\n",
     "\n",
-    "ba_metric = pd.concat([ba_metric, total], axis=0).round(2) \n",
+    "ba_metric = pd.concat([ba_metric, total], axis=0).round(2)\n",
     "\n",
-    "ba_metric = ba_metric[data_columns + ['num_plants']]\n",
+    "ba_metric = ba_metric[data_columns + [\"num_plants\"]]\n",
     "\n",
-    "columns_to_check = ['net_generation_mwh','fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb']\n",
+    "columns_to_check = [\n",
+    "    \"net_generation_mwh\",\n",
+    "    \"fuel_consumed_mmbtu\",\n",
+    "    \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "    \"co2_mass_lb\",\n",
+    "]\n",
     "\n",
-    "with pd.option_context('display.max_rows', None, 'display.max_columns', None):\n",
+    "with pd.option_context(\"display.max_rows\", None, \"display.max_columns\", None):\n",
     "    display(ba_metric[~(ba_metric[columns_to_check] == 1).all(axis=1)])"
    ]
   },
@@ -832,10 +1206,17 @@
    "outputs": [],
    "source": [
     "# how much co2 comes from CEMS vs EIA\n",
-    "data_source_by_ba = pd.pivot_table(eia923_allocated, values='co2_mass_lb', index='ba_code', columns='data_source', dropna=False, aggfunc=np.sum).replace(0,0.001)\n",
+    "data_source_by_ba = pd.pivot_table(\n",
+    "    eia923_allocated,\n",
+    "    values=\"co2_mass_lb\",\n",
+    "    index=\"ba_code\",\n",
+    "    columns=\"data_source\",\n",
+    "    dropna=False,\n",
+    "    aggfunc=np.sum,\n",
+    ").replace(0, 0.001)\n",
     "data_source_by_ba = data_source_by_ba.div(data_source_by_ba.sum(axis=1), axis=0)\n",
-    "with pd.option_context('display.max_rows', None, 'display.max_columns', None):\n",
-    "    display(data_source_by_ba.round(3).fillna(0).sort_values(by='cems'))"
+    "with pd.option_context(\"display.max_rows\", None, \"display.max_columns\", None):\n",
+    "    display(data_source_by_ba.round(3).fillna(0).sort_values(by=\"cems\"))"
    ]
   },
   {
@@ -889,12 +1270,12 @@
    "outputs": [],
    "source": [
     "# examine specific plants in a category\n",
-    "value = 'fuel_consumed_mmbtu'\n",
-    "status = '>50%'\n",
+    "value = \"fuel_consumed_mmbtu\"\n",
+    "status = \">50%\"\n",
     "\n",
-    "#compared_merged.loc[64877,:]\n",
+    "# compared_merged.loc[64877,:]\n",
     "\n",
-    "compared[compared[f'{value}_status'] == status].sort_values(by=value)"
+    "compared[compared[f\"{value}_status\"] == status].sort_values(by=value)"
    ]
   },
   {
@@ -903,7 +1284,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "compared[compared[f'{value}_status'] == status].sort_values(by=value).sample(10)"
+    "compared[compared[f\"{value}_status\"] == status].sort_values(by=value).sample(10)"
    ]
   },
   {
@@ -921,7 +1302,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "egrid_plant[egrid_plant['plant_id_eia'] == plant_to_explore]"
+    "egrid_plant[egrid_plant[\"plant_id_eia\"] == plant_to_explore]"
    ]
   },
   {
@@ -930,7 +1311,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plant_annual_total[plant_annual_total['plant_id_eia'] == plant_to_explore]"
+    "plant_annual_total[plant_annual_total[\"plant_id_eia\"] == plant_to_explore]"
    ]
   },
   {
@@ -939,7 +1320,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "eia923_allocated[eia923_allocated['plant_id_eia'] == plant_to_explore].sum()"
+    "eia923_allocated[eia923_allocated[\"plant_id_eia\"] == plant_to_explore].sum()"
    ]
   },
   {
@@ -948,7 +1329,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems_unit_monthly = cems.groupby(['plant_id_eia','emissions_unit_id_epa','report_date']).sum().reset_index()\n"
+    "cems_unit_monthly = (\n",
+    "    cems.groupby([\"plant_id_eia\", \"emissions_unit_id_epa\", \"report_date\"])\n",
+    "    .sum()\n",
+    "    .reset_index()\n",
+    ")"
    ]
   },
   {
@@ -957,7 +1342,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems_unit_monthly[cems_unit_monthly['plant_id_eia'] == plant_to_explore].sum()"
+    "cems_unit_monthly[cems_unit_monthly[\"plant_id_eia\"] == plant_to_explore].sum()"
    ]
   },
   {
@@ -966,7 +1351,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems[(cems['plant_id_eia'] == plant_to_explore) & (cems['operating_time_hours'] > 0)]"
+    "cems[(cems[\"plant_id_eia\"] == plant_to_explore) & (cems[\"operating_time_hours\"] > 0)]"
    ]
   },
   {
@@ -975,7 +1360,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems_plant_annual[cems_plant_annual['plant_id_eia'] == plant_to_explore]"
+    "cems_plant_annual[cems_plant_annual[\"plant_id_eia\"] == plant_to_explore]"
    ]
   },
   {
@@ -986,7 +1371,7 @@
    "source": [
     "# there are some plants that report heat input and co2 in CEMS, but are missing net generation data\n",
     "# TODO: we should maybe try and fill net generation data using EIA-923?\n",
-    "cems_plant_annual[cems_plant_annual['net_generation_mwh'] == 0]"
+    "cems_plant_annual[cems_plant_annual[\"net_generation_mwh\"] == 0]"
    ]
   },
   {
@@ -1004,8 +1389,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ba_plant_match = plant_annual_total[plant_annual_total['ba_code'].isna()].merge(egrid_plant[egrid_plant['ba_code'].isna()], how='left', on='plant_id_egrid', suffixes=(\"_calc\",'_egrid'))\n",
-    "ba_plant_match[ba_plant_match['net_generation_mwh_calc'].round(0) != ba_plant_match['net_generation_mwh_egrid'].round(0)]"
+    "ba_plant_match = plant_annual_total[plant_annual_total[\"ba_code\"].isna()].merge(\n",
+    "    egrid_plant[egrid_plant[\"ba_code\"].isna()],\n",
+    "    how=\"left\",\n",
+    "    on=\"plant_id_egrid\",\n",
+    "    suffixes=(\"_calc\", \"_egrid\"),\n",
+    ")\n",
+    "ba_plant_match[\n",
+    "    ba_plant_match[\"net_generation_mwh_calc\"].round(0)\n",
+    "    != ba_plant_match[\"net_generation_mwh_egrid\"].round(0)\n",
+    "]"
    ]
   },
   {
@@ -1014,7 +1407,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "egrid_plant[egrid_plant['ba_code'] == 'CPLE']"
+    "egrid_plant[egrid_plant[\"ba_code\"] == \"CPLE\"]"
    ]
   },
   {
@@ -1023,7 +1416,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "compare_plants_in_ba = egrid_plant.merge(plant_annual_total, how='outer', on='plant_id_egrid', suffixes=('_egrid','_calc'))"
+    "compare_plants_in_ba = egrid_plant.merge(\n",
+    "    plant_annual_total, how=\"outer\", on=\"plant_id_egrid\", suffixes=(\"_egrid\", \"_calc\")\n",
+    ")"
    ]
   },
   {
@@ -1032,10 +1427,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ba = 'CPLE'\n",
-    "metric = 'fuel_consumed_mmbtu'\n",
+    "ba = \"CPLE\"\n",
+    "metric = \"fuel_consumed_mmbtu\"\n",
     "\n",
-    "compare_plants_in_ba[((compare_plants_in_ba['ba_code_egrid'] == ba) | (compare_plants_in_ba['ba_code_calc'] == ba)) & (compare_plants_in_ba[f'{metric}_egrid'].round(0) != compare_plants_in_ba[f'{metric}_calc'].round(0))]"
+    "compare_plants_in_ba[\n",
+    "    (\n",
+    "        (compare_plants_in_ba[\"ba_code_egrid\"] == ba)\n",
+    "        | (compare_plants_in_ba[\"ba_code_calc\"] == ba)\n",
+    "    )\n",
+    "    & (\n",
+    "        compare_plants_in_ba[f\"{metric}_egrid\"].round(0)\n",
+    "        != compare_plants_in_ba[f\"{metric}_calc\"].round(0)\n",
+    "    )\n",
+    "]"
    ]
   },
   {
@@ -1044,7 +1448,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plant_annual_total[plant_annual_total['ba_code'] == 'AMPL']"
+    "plant_annual_total[plant_annual_total[\"ba_code\"] == \"AMPL\"]"
    ]
   }
  ],
diff --git a/notebooks/validation/diff_output_versions.ipynb b/notebooks/validation/diff_output_versions.ipynb
index b6c8d200..7ba28be0 100644
--- a/notebooks/validation/diff_output_versions.ipynb
+++ b/notebooks/validation/diff_output_versions.ipynb
@@ -16,7 +16,8 @@
     "\n",
     "# # Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
     "\n",
     "import load_data\n",
     "from column_checks import get_dtypes\n",
@@ -61,30 +62,62 @@
     "# unzip archived data\n",
     "if not os.path.exists(data_folder(\"diff\")):\n",
     "    os.mkdir(data_folder(\"diff\"))\n",
-    "with zipfile.ZipFile(data_folder(f\"s3_upload/{year}_{data_type}_{resolution}_us_units.zip\"), \"r\") as zip_to_unzip:\n",
-    "    zip_to_unzip.extractall(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units\"))\n",
+    "with zipfile.ZipFile(\n",
+    "    data_folder(f\"s3_upload/{year}_{data_type}_{resolution}_us_units.zip\"), \"r\"\n",
+    ") as zip_to_unzip:\n",
+    "    zip_to_unzip.extractall(\n",
+    "        data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units\")\n",
+    "    )\n",
     "\n",
     "# load archived data\n",
-    "prev_data = pd.read_csv(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units/plant_data.csv\"), dtype=get_dtypes()).round(0)\n",
+    "prev_data = pd.read_csv(\n",
+    "    data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units/plant_data.csv\"),\n",
+    "    dtype=get_dtypes(),\n",
+    ").round(0)\n",
     "\n",
     "# load new data\n",
-    "new_data = pd.read_csv(results_folder(f\"{year}/{data_type}/{resolution}/us_units/plant_data.csv\"), dtype=get_dtypes()).round(0)\n",
+    "new_data = pd.read_csv(\n",
+    "    results_folder(f\"{year}/{data_type}/{resolution}/us_units/plant_data.csv\"),\n",
+    "    dtype=get_dtypes(),\n",
+    ").round(0)\n",
     "\n",
     "# load plant attributes\n",
-    "plant_attributes = pd.read_csv(outputs_folder(f\"{year}/plant_static_attributes_{year}.csv\"), dtype=get_dtypes())\n",
+    "plant_attributes = pd.read_csv(\n",
+    "    outputs_folder(f\"{year}/plant_static_attributes_{year}.csv\"), dtype=get_dtypes()\n",
+    ")\n",
     "\n",
-    "prev_data = prev_data.merge(plant_attributes[[\"plant_id_eia\",\"ba_code\",\"fuel_category\"]], how=\"left\", on=\"plant_id_eia\")\n",
-    "new_data = new_data.merge(plant_attributes[[\"plant_id_eia\",\"ba_code\",\"fuel_category\"]], how=\"left\", on=\"plant_id_eia\")\n",
+    "prev_data = prev_data.merge(\n",
+    "    plant_attributes[[\"plant_id_eia\", \"ba_code\", \"fuel_category\"]],\n",
+    "    how=\"left\",\n",
+    "    on=\"plant_id_eia\",\n",
+    ")\n",
+    "new_data = new_data.merge(\n",
+    "    plant_attributes[[\"plant_id_eia\", \"ba_code\", \"fuel_category\"]],\n",
+    "    how=\"left\",\n",
+    "    on=\"plant_id_eia\",\n",
+    ")\n",
     "\n",
-    "key_cols = [\"plant_id_eia\",\"ba_code\",\"fuel_category\"]\n",
-    "comparison = prev_data.set_index(key_cols).compare(new_data.set_index(key_cols), result_names=(\"previous\",\"new\"))\n",
+    "key_cols = [\"plant_id_eia\", \"ba_code\", \"fuel_category\"]\n",
+    "comparison = prev_data.set_index(key_cols).compare(\n",
+    "    new_data.set_index(key_cols), result_names=(\"previous\", \"new\")\n",
+    ")\n",
     "\n",
     "# get difference\n",
-    "diff = comparison.groupby(level=0, axis=1).diff().rename(columns={\"new\":\"pct_diff\"}).drop(columns=[\"previous\"], level=1)\n",
-    "comparison = pd.concat([comparison, diff], axis=1).sort_index(axis=1, level=0, ascending=True, sort_remaining=False)\n",
-    "comparison.iloc[:, comparison.columns.get_level_values(1)=='pct_diff'] = (comparison.iloc[:, comparison.columns.get_level_values(1)=='pct_diff'].values / comparison.iloc[:, comparison.columns.get_level_values(1)=='previous'].values).round(2)\n",
+    "diff = (\n",
+    "    comparison.groupby(level=0, axis=1)\n",
+    "    .diff()\n",
+    "    .rename(columns={\"new\": \"pct_diff\"})\n",
+    "    .drop(columns=[\"previous\"], level=1)\n",
+    ")\n",
+    "comparison = pd.concat([comparison, diff], axis=1).sort_index(\n",
+    "    axis=1, level=0, ascending=True, sort_remaining=False\n",
+    ")\n",
+    "comparison.iloc[:, comparison.columns.get_level_values(1) == \"pct_diff\"] = (\n",
+    "    comparison.iloc[:, comparison.columns.get_level_values(1) == \"pct_diff\"].values\n",
+    "    / comparison.iloc[:, comparison.columns.get_level_values(1) == \"previous\"].values\n",
+    ").round(2)\n",
     "\n",
-    "comparison\n"
+    "comparison"
    ]
   },
   {
@@ -93,7 +126,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "comparison[comparison.loc[:,(\"co2_mass_lb_for_electricity\",\"pct_diff\")] > 0.001]#.groupby(\"ba_code\").sum().sum()"
+    "comparison[\n",
+    "    comparison.loc[:, (\"co2_mass_lb_for_electricity\", \"pct_diff\")] > 0.001\n",
+    "]  # .groupby(\"ba_code\").sum().sum()"
    ]
   },
   {
@@ -117,13 +152,20 @@
     "# unzip archived data\n",
     "if not os.path.exists(data_folder(\"diff\")):\n",
     "    os.mkdir(data_folder(\"diff\"))\n",
-    "with zipfile.ZipFile(data_folder(f\"s3_upload/{year}_{data_type}_{resolution}_us_units.zip\"), \"r\") as zip_to_unzip:\n",
-    "    zip_to_unzip.extractall(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units\"))\n",
+    "with zipfile.ZipFile(\n",
+    "    data_folder(f\"s3_upload/{year}_{data_type}_{resolution}_us_units.zip\"), \"r\"\n",
+    ") as zip_to_unzip:\n",
+    "    zip_to_unzip.extractall(\n",
+    "        data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units\")\n",
+    "    )\n",
     "\n",
     "# load archived data\n",
     "prev_data = []\n",
     "for ba in os.listdir(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units\")):\n",
-    "    df = pd.read_csv(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units/{ba}\"), dtype=get_dtypes())\n",
+    "    df = pd.read_csv(\n",
+    "        data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units/{ba}\"),\n",
+    "        dtype=get_dtypes(),\n",
+    "    )\n",
     "    df[\"ba_code\"] = ba.split(\".\")[0]\n",
     "    prev_data.append(df)\n",
     "\n",
@@ -132,15 +174,20 @@
     "# load data\n",
     "new_data = []\n",
     "for ba in os.listdir(results_folder(f\"{year}/{data_type}/{resolution}/us_units\")):\n",
-    "    df = pd.read_csv(results_folder(f\"{year}/{data_type}/{resolution}/us_units/{ba}\"), dtype=get_dtypes())\n",
+    "    df = pd.read_csv(\n",
+    "        results_folder(f\"{year}/{data_type}/{resolution}/us_units/{ba}\"),\n",
+    "        dtype=get_dtypes(),\n",
+    "    )\n",
     "    df[\"ba_code\"] = ba.split(\".\")[0]\n",
     "    new_data.append(df)\n",
     "\n",
     "new_data = pd.concat(new_data, axis=0).reset_index(drop=True)\n",
     "\n",
     "key_cols = [\"ba_code\", \"fuel_category\"]\n",
-    "comparison = prev_data.set_index(key_cols).compare(new_data.set_index(key_cols), result_names=(\"previous\",\"new\"))\n",
-    "comparison\n"
+    "comparison = prev_data.set_index(key_cols).compare(\n",
+    "    new_data.set_index(key_cols), result_names=(\"previous\", \"new\")\n",
+    ")\n",
+    "comparison"
    ]
   },
   {
@@ -159,22 +206,28 @@
    "source": [
     "# load archived data\n",
     "file = \"cems_cleaned\"\n",
-    "key_cols = [\"plant_id_eia\",\"emissions_unit_id_epa\",\"datetime_utc\"]\n",
+    "key_cols = [\"plant_id_eia\", \"emissions_unit_id_epa\", \"datetime_utc\"]\n",
     "\n",
     "# unzip archived data\n",
     "if not os.path.exists(data_folder(f\"diff/outputs_{year}\")):\n",
     "    os.mkdir(data_folder(f\"diff/outputs_{year}\"))\n",
-    "    with zipfile.ZipFile(data_folder(f\"zenodo/outputs_{year}.zip\"), \"r\") as zip_to_unzip:\n",
+    "    with zipfile.ZipFile(\n",
+    "        data_folder(f\"zenodo/outputs_{year}.zip\"), \"r\"\n",
+    "    ) as zip_to_unzip:\n",
     "        zip_to_unzip.extractall(data_folder(f\"diff/outputs_{year}\"))\n",
     "\n",
     "# load archived data\n",
-    "prev_data = pd.read_csv(data_folder(f\"diff/outputs_{year}/{file}_{year}.csv\"), dtype=get_dtypes())\n",
+    "prev_data = pd.read_csv(\n",
+    "    data_folder(f\"diff/outputs_{year}/{file}_{year}.csv\"), dtype=get_dtypes()\n",
+    ")\n",
     "\n",
     "# load new data\n",
     "new_data = pd.read_csv(outputs_folder(f\"{year}/{file}_{year}.csv\"), dtype=get_dtypes())\n",
     "\n",
-    "comparison = prev_data.set_index(key_cols).compare(new_data.set_index(key_cols), result_names=(\"previous\",\"new\"))\n",
-    "comparison\n"
+    "comparison = prev_data.set_index(key_cols).compare(\n",
+    "    new_data.set_index(key_cols), result_names=(\"previous\", \"new\")\n",
+    ")\n",
+    "comparison"
    ]
   }
  ],
diff --git a/notebooks/validation/hourly_validation.ipynb b/notebooks/validation/hourly_validation.ipynb
index a52c7a73..c5c1d9d0 100644
--- a/notebooks/validation/hourly_validation.ipynb
+++ b/notebooks/validation/hourly_validation.ipynb
@@ -35,6 +35,7 @@
    "outputs": [],
    "source": [
     "import sys\n",
+    "\n",
     "sys.path.append(\"../../src\")\n",
     "\n",
     "import filepaths"
@@ -56,7 +57,11 @@
    "outputs": [],
    "source": [
     "# EIA-930 data after timestamp adjustments but no cleaning\n",
-    "raw = pd.read_csv(f\"{filepaths.data_folder()}/outputs/2020/eia930/eia930_raw.csv\", index_col=0, parse_dates=True)"
+    "raw = pd.read_csv(\n",
+    "    f\"{filepaths.data_folder()}/outputs/2020/eia930/eia930_raw.csv\",\n",
+    "    index_col=0,\n",
+    "    parse_dates=True,\n",
+    ")"
    ]
   },
   {
@@ -74,15 +79,15 @@
     "    ba = ba_f.replace(\".csv\", \"\")\n",
     "    print(ba, end=\"...\")\n",
     "    col_name = GEN_ID.format(ba)\n",
-    "    if col_name not in raw.columns: \n",
+    "    if col_name not in raw.columns:\n",
     "        continue\n",
     "    else:\n",
-    "        dat = pd.read_csv(path+ba_f, parse_dates=[\"datetime_utc\"])\n",
-    "        dat = dat[dat.fuel_category==\"total\"]\n",
-    "        dat = dat.merge(raw[ col_name], left_on=\"datetime_utc\", right_index=True)\n",
-    "        c = dat[[\"net_generation_mwh\", col_name]].corr().to_numpy()[0,1]\n",
+    "        dat = pd.read_csv(path + ba_f, parse_dates=[\"datetime_utc\"])\n",
+    "        dat = dat[dat.fuel_category == \"total\"]\n",
+    "        dat = dat.merge(raw[col_name], left_on=\"datetime_utc\", right_index=True)\n",
+    "        c = dat[[\"net_generation_mwh\", col_name]].corr().to_numpy()[0, 1]\n",
     "        cors[ba] = c\n",
-    "        difs = (dat[col_name]-dat[\"net_generation_mwh\"])/dat[\"net_generation_mwh\"]\n",
+    "        difs = (dat[col_name] - dat[\"net_generation_mwh\"]) / dat[\"net_generation_mwh\"]\n",
     "        difs = difs.replace(np.inf, np.nan)\n",
     "        percent_difs[ba] = difs.median()\n",
     "        annual_gen[ba] = dat[\"net_generation_mwh\"].sum()"
@@ -94,11 +99,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "os.makedirs(f\"{filepaths.data_folder()}/outputs/{year}/validation_metrics/us_units\", exist_ok=True)\n",
+    "os.makedirs(\n",
+    "    f\"{filepaths.data_folder()}/outputs/{year}/validation_metrics/us_units\",\n",
+    "    exist_ok=True,\n",
+    ")\n",
     "\n",
-    "out = pd.DataFrame(data={\"Difference as percent of hourly-egrid\":percent_difs, \"Correlation\":cors, \"Annual BA generation\":annual_gen})\n",
+    "out = pd.DataFrame(\n",
+    "    data={\n",
+    "        \"Difference as percent of hourly-egrid\": percent_difs,\n",
+    "        \"Correlation\": cors,\n",
+    "        \"Annual BA generation\": annual_gen,\n",
+    "    }\n",
+    ")\n",
     "out = out.sort_values(\"Annual BA generation\", ascending=False)\n",
-    "out.to_csv(f\"{filepaths.data_folder()}/outputs/{year}/validation_metrics/us_units/compare_930_hourlyegrid.csv\")"
+    "out.to_csv(\n",
+    "    f\"{filepaths.data_folder()}/outputs/{year}/validation_metrics/us_units/compare_930_hourlyegrid.csv\"\n",
+    ")"
    ]
   },
   {
@@ -116,9 +132,9 @@
    "source": [
     "ba = \"NYIS\"\n",
     "col_name = GEN_ID.format(ba)\n",
-    "dat = pd.read_csv(path+ba+\".csv\", parse_dates=[\"datetime_utc\"])\n",
-    "dat = dat[dat.fuel_category==\"total\"]\n",
-    "dat = dat.merge(raw[ col_name], left_on=\"datetime_utc\", right_index=True)\n",
+    "dat = pd.read_csv(path + ba + \".csv\", parse_dates=[\"datetime_utc\"])\n",
+    "dat = dat[dat.fuel_category == \"total\"]\n",
+    "dat = dat.merge(raw[col_name], left_on=\"datetime_utc\", right_index=True)\n",
     "\n",
     "px.line(dat, x=\"datetime_utc\", y=[\"net_generation_mwh\", col_name])"
    ]
@@ -136,7 +152,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "eia930 = pd.read_csv(f\"{filepaths.data_folder()}/outputs/{year}/eia930/eia930_rolling.csv\", parse_dates=True, index_col=0)"
+    "eia930 = pd.read_csv(\n",
+    "    f\"{filepaths.data_folder()}/outputs/{year}/eia930/eia930_rolling.csv\",\n",
+    "    parse_dates=True,\n",
+    "    index_col=0,\n",
+    ")"
    ]
   },
   {
@@ -148,21 +168,21 @@
     "## Load factors from Singularity API\n",
     "\n",
     "# Use last year's egrid because that's all we have in real time\n",
-    "# TODO: could expand to other pollutants if we use eGRID download \n",
-    "url = f\"https://api.singularity.energy/v1/emissions/\" \n",
-    "egrid_year = str(year-1) # use last year as eGRID year\n",
+    "# TODO: could expand to other pollutants if we use eGRID download\n",
+    "url = f\"https://api.singularity.energy/v1/emissions/\"\n",
+    "egrid_year = str(year - 1)  # use last year as eGRID year\n",
     "\n",
     "headers = {\n",
-    "    'X-Api-Key': os.environ['SINGULARITY_API_KEY'],\n",
+    "    \"X-Api-Key\": os.environ[\"SINGULARITY_API_KEY\"],\n",
     "}\n",
     "\n",
     "factors = {}\n",
     "\n",
-    "for adjustment in [\"adjusted\", \"unadjusted\"]: \n",
+    "for adjustment in [\"adjusted\", \"unadjusted\"]:\n",
     "    adjusted = adjustment == \"adjusted\"\n",
     "    key = f\"EGRID_{egrid_year}\" if adjusted else f\"EGRID_u{egrid_year}\"\n",
-    "    response = requests.request(\"GET\", url+key, headers=headers)\n",
-    "    factors[adjustment] = json.loads(response.content)[\"data\"]\n"
+    "    response = requests.request(\"GET\", url + key, headers=headers)\n",
+    "    factors[adjustment] = json.loads(response.content)[\"data\"]"
    ]
   },
   {
@@ -171,7 +191,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Default factors: coal factor is missing in FPC, PACW; so need national factor \n",
+    "# Default factors: coal factor is missing in FPC, PACW; so need national factor\n",
     "default_factors = {}\n",
     "default_factors[\"adjusted\"] = {}\n",
     "default_factors[\"unadjusted\"] = {}\n",
@@ -195,13 +215,13 @@
    "outputs": [],
    "source": [
     "EIA_REGIONS = {\n",
-    "    'BPAT',\n",
-    "    'CISO',\n",
-    "    'ISNE',\n",
-    "    'MISO',\n",
-    "    'NYIS',\n",
-    "    'PJM',\n",
-    "    'SWPP',\n",
+    "    \"BPAT\",\n",
+    "    \"CISO\",\n",
+    "    \"ISNE\",\n",
+    "    \"MISO\",\n",
+    "    \"NYIS\",\n",
+    "    \"PJM\",\n",
+    "    \"SWPP\",\n",
     "}"
    ]
   },
@@ -211,18 +231,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "## For each BA, use singularity factors to calculate emission rate \n",
-    "bas_to_calc = [ba.replace(\".csv\", \"\") for ba in os.listdir(f\"{filepaths.results_folder()}/2020/power_sector_data/hourly/us_units/\")]\n",
+    "## For each BA, use singularity factors to calculate emission rate\n",
+    "bas_to_calc = [\n",
+    "    ba.replace(\".csv\", \"\")\n",
+    "    for ba in os.listdir(\n",
+    "        f\"{filepaths.results_folder()}/2020/power_sector_data/hourly/us_units/\"\n",
+    "    )\n",
+    "]\n",
     "\n",
     "fuel_categories = {\n",
-    "    \"coal\":\"COL\",\n",
-    "    \"natural_gas\":\"NG\",\n",
-    "    \"other\":\"OTH\",\n",
-    "    \"hydro\":\"WAT\",\n",
-    "    \"wind\":\"WND\",\n",
-    "    \"solar\":\"SUN\",\n",
-    "    \"nuclear\":\"NUC\",\n",
-    "    \"petroleum\":\"OIL\"\n",
+    "    \"coal\": \"COL\",\n",
+    "    \"natural_gas\": \"NG\",\n",
+    "    \"other\": \"OTH\",\n",
+    "    \"hydro\": \"WAT\",\n",
+    "    \"wind\": \"WND\",\n",
+    "    \"solar\": \"SUN\",\n",
+    "    \"nuclear\": \"NUC\",\n",
+    "    \"petroleum\": \"OIL\",\n",
     "}\n",
     "\n",
     "for ba in bas_to_calc:\n",
@@ -231,24 +256,41 @@
     "        print(f\"missing ba {singularity_ba}\")\n",
     "        continue\n",
     "\n",
-    "    out = pd.DataFrame(index=eia930.index, columns=[\"adjusted_carbon\",\"unajusted_carbon\", \"adjusted_rate\", \"unadjusted_rate\"])\n",
+    "    out = pd.DataFrame(\n",
+    "        index=eia930.index,\n",
+    "        columns=[\n",
+    "            \"adjusted_carbon\",\n",
+    "            \"unajusted_carbon\",\n",
+    "            \"adjusted_rate\",\n",
+    "            \"unadjusted_rate\",\n",
+    "        ],\n",
+    "    )\n",
     "\n",
     "    for adjustment in [\"adjusted\", \"unadjusted\"]:\n",
     "        s_fuels = list(factors[adjustment][singularity_ba].keys())\n",
-    "        s_factors = [factors[adjustment][singularity_ba][f]['value'] for f in s_fuels]\n",
-    "        # Add default factors for missing fuel types \n",
+    "        s_factors = [factors[adjustment][singularity_ba][f][\"value\"] for f in s_fuels]\n",
+    "        # Add default factors for missing fuel types\n",
     "        for f in default_factors[adjustment].keys():\n",
-    "            if f not in s_fuels: \n",
+    "            if f not in s_fuels:\n",
     "                s_fuels.append(f)\n",
     "                s_factors.append(default_factors[adjustment][f])\n",
     "        fuels = [fuel_categories[f] for f in s_fuels]\n",
     "        generation_labels = [f\"EBA.{ba}-ALL.NG.{f}.H\" for f in fuels]\n",
     "\n",
-    "        out.loc[:,f\"{adjustment}_carbon\"] = eia930[generation_labels].mul(s_factors, axis='columns').sum(axis='columns')\n",
-    "        out.loc[:,f\"{adjustment}_rate\"] = out.loc[:,f\"{adjustment}_carbon\"] / eia930.loc[:,f\"EBA.{ba}-ALL.NG.H\"]\n",
+    "        out.loc[:, f\"{adjustment}_carbon\"] = (\n",
+    "            eia930[generation_labels].mul(s_factors, axis=\"columns\").sum(axis=\"columns\")\n",
+    "        )\n",
+    "        out.loc[:, f\"{adjustment}_rate\"] = (\n",
+    "            out.loc[:, f\"{adjustment}_carbon\"] / eia930.loc[:, f\"EBA.{ba}-ALL.NG.H\"]\n",
+    "        )\n",
     "\n",
-    "    os.makedirs(f\"{filepaths.data_folder()}/outputs/{year}/validation/real_time_rate/\", exist_ok=True)\n",
-    "    out.to_csv(f\"{filepaths.data_folder()}/outputs/{year}/validation/real_time_rate/{ba}.csv\")\n"
+    "    os.makedirs(\n",
+    "        f\"{filepaths.data_folder()}/outputs/{year}/validation/real_time_rate/\",\n",
+    "        exist_ok=True,\n",
+    "    )\n",
+    "    out.to_csv(\n",
+    "        f\"{filepaths.data_folder()}/outputs/{year}/validation/real_time_rate/{ba}.csv\"\n",
+    "    )"
    ]
   },
   {
@@ -266,8 +308,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "gen_path = f\"{filepaths.data_folder()}/results/{year}/power_sector_data/hourly/us_units/\"\n",
-    "consumed_path = f\"{filepaths.data_folder()}/results/{year}/carbon_accounting/hourly/us_units/\""
+    "gen_path = (\n",
+    "    f\"{filepaths.data_folder()}/results/{year}/power_sector_data/hourly/us_units/\"\n",
+    ")\n",
+    "consumed_path = (\n",
+    "    f\"{filepaths.data_folder()}/results/{year}/carbon_accounting/hourly/us_units/\"\n",
+    ")"
    ]
   },
   {
@@ -290,39 +336,60 @@
     "med_rate = {}\n",
     "cors = {}\n",
     "max_difs = {}\n",
-    "for ba in os.listdir(f\"{filepaths.data_folder()}/outputs/{year}/validation/real_time_rate/\"):\n",
-    "    if ba == \".DS_Store\": # just some os stuff\n",
-    "        continue \n",
+    "for ba in os.listdir(\n",
+    "    f\"{filepaths.data_folder()}/outputs/{year}/validation/real_time_rate/\"\n",
+    "):\n",
+    "    if ba == \".DS_Store\":  # just some os stuff\n",
+    "        continue\n",
     "    ba = ba.replace(\".csv\", \"\")\n",
-    "    singularity_dat = pd.read_csv(f\"{filepaths.data_folder()}/outputs/{year}/validation/real_time_rate/{ba}.csv\", index_col=0, parse_dates=True)\n",
+    "    singularity_dat = pd.read_csv(\n",
+    "        f\"{filepaths.data_folder()}/outputs/{year}/validation/real_time_rate/{ba}.csv\",\n",
+    "        index_col=0,\n",
+    "        parse_dates=True,\n",
+    "    )\n",
     "    # hourly_consumed = pd.read_csv(consumed_path+ba+\".csv\",\n",
-    "    #     usecols=[\"datetime_utc\", \"consumed_co2_rate_lb_per_mwh_for_electricity\", \"consumed_co2_rate_lb_per_mwh_adjusted\"], \n",
+    "    #     usecols=[\"datetime_utc\", \"consumed_co2_rate_lb_per_mwh_for_electricity\", \"consumed_co2_rate_lb_per_mwh_adjusted\"],\n",
     "    #     index_col=\"datetime_utc\", parse_dates=True)\n",
-    "    hourly_generated = pd.read_csv(gen_path+ba+\".csv\", \n",
-    "        usecols=[\"datetime_utc\", \"generated_co2_rate_lb_per_mwh_for_electricity\", \"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\", \"co2_mass_lb\", \"fuel_category\"], \n",
-    "        index_col=\"datetime_utc\", parse_dates=True)\n",
-    "    hourly_generated = hourly_generated.loc[hourly_generated.fuel_category==\"total\"]\n",
+    "    hourly_generated = pd.read_csv(\n",
+    "        gen_path + ba + \".csv\",\n",
+    "        usecols=[\n",
+    "            \"datetime_utc\",\n",
+    "            \"generated_co2_rate_lb_per_mwh_for_electricity\",\n",
+    "            \"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\",\n",
+    "            \"co2_mass_lb\",\n",
+    "            \"fuel_category\",\n",
+    "        ],\n",
+    "        index_col=\"datetime_utc\",\n",
+    "        parse_dates=True,\n",
+    "    )\n",
+    "    hourly_generated = hourly_generated.loc[hourly_generated.fuel_category == \"total\"]\n",
     "    hourly_generated = hourly_generated.sort_index()\n",
-    "    all_dat = pd.concat([singularity_dat, hourly_generated], axis='columns')\n",
+    "    all_dat = pd.concat([singularity_dat, hourly_generated], axis=\"columns\")\n",
     "\n",
     "    dat_key = \"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\"\n",
     "\n",
     "    # Patch fix for PJM, see https://github.com/singularity-energy/open-grid-emissions/issues/230\n",
-    "    if ba==\"PJM\":\n",
+    "    if ba == \"PJM\":\n",
     "        all_dat.loc[all_dat[dat_key] < 100, dat_key] = np.nan\n",
     "        all_dat = all_dat[\"2020-02-01T00:00\":]\n",
     "\n",
-    "    # Patch fix for FPL real-time issue not caught by rolling filter \n",
-    "    if ba==\"FPL\":\n",
+    "    # Patch fix for FPL real-time issue not caught by rolling filter\n",
+    "    if ba == \"FPL\":\n",
     "        all_dat.loc[all_dat[\"adjusted_rate\"] > 5000, \"adjusted_rate\"] = np.nan\n",
     "\n",
-    "\n",
     "    all_dat = all_dat.sort_index()\n",
-    "    cors[ba] = all_dat[[dat_key, \"adjusted_rate\"]].corr().to_numpy()[0,1]\n",
-    "    percent_difs[ba] = ((all_dat[\"adjusted_rate\"] - all_dat[dat_key])/all_dat[dat_key]).median()\n",
-    "    max_difs[ba] = ((all_dat[\"adjusted_rate\"] - all_dat[dat_key])/all_dat[dat_key]).abs().replace(1.0, np.nan).max()\n",
-    "    abs_difs[ba] = ((all_dat[\"adjusted_rate\"] - all_dat[dat_key])).median()\n",
-    "    med_rate[ba] = all_dat[\"adjusted_rate\"].median()\n"
+    "    cors[ba] = all_dat[[dat_key, \"adjusted_rate\"]].corr().to_numpy()[0, 1]\n",
+    "    percent_difs[ba] = (\n",
+    "        (all_dat[\"adjusted_rate\"] - all_dat[dat_key]) / all_dat[dat_key]\n",
+    "    ).median()\n",
+    "    max_difs[ba] = (\n",
+    "        ((all_dat[\"adjusted_rate\"] - all_dat[dat_key]) / all_dat[dat_key])\n",
+    "        .abs()\n",
+    "        .replace(1.0, np.nan)\n",
+    "        .max()\n",
+    "    )\n",
+    "    abs_difs[ba] = (all_dat[\"adjusted_rate\"] - all_dat[dat_key]).median()\n",
+    "    med_rate[ba] = all_dat[\"adjusted_rate\"].median()"
    ]
   },
   {
@@ -331,35 +398,48 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "out = pd.DataFrame(data={\n",
-    "    \"Median rate difference\":abs_difs,\n",
-    "    \"Difference as percent of OGE\":percent_difs,\n",
-    "    \"Correlation\":cors, \n",
-    "    \"Annual BA generation\":annual_gen,\n",
-    "    \"Median rate\":med_rate,\n",
-    "    })\n",
+    "out = pd.DataFrame(\n",
+    "    data={\n",
+    "        \"Median rate difference\": abs_difs,\n",
+    "        \"Difference as percent of OGE\": percent_difs,\n",
+    "        \"Correlation\": cors,\n",
+    "        \"Annual BA generation\": annual_gen,\n",
+    "        \"Median rate\": med_rate,\n",
+    "    }\n",
+    ")\n",
     "out = out.sort_values(\"Annual BA generation\", ascending=False)\n",
     "\n",
-    "# Exclude BAs for which we couldn't calculate a real-time rate \n",
-    "todrop = [b for b in out.index if (b not in factors[\"adjusted\"].keys()) and (\"EIA.\"+b not in factors[\"adjusted\"].keys())]\n",
-    "print(f\"dropping {todrop} because they aren't included in Singularity's emission rate API\")\n",
+    "# Exclude BAs for which we couldn't calculate a real-time rate\n",
+    "todrop = [\n",
+    "    b\n",
+    "    for b in out.index\n",
+    "    if (b not in factors[\"adjusted\"].keys())\n",
+    "    and (\"EIA.\" + b not in factors[\"adjusted\"].keys())\n",
+    "]\n",
+    "print(\n",
+    "    f\"dropping {todrop} because they aren't included in Singularity's emission rate API\"\n",
+    ")\n",
     "out = out.drop(labels=todrop)\n",
     "# exclude BAs for which rate is always zero (Hydro-only BAs)\n",
     "zero_rates = []\n",
-    "for ba in out.index: \n",
-    "    if (out.loc[ba, \"Median rate\"] == 0) and (out.loc[ba, \"Median rate difference\"] == 0):\n",
+    "for ba in out.index:\n",
+    "    if (out.loc[ba, \"Median rate\"] == 0) and (\n",
+    "        out.loc[ba, \"Median rate difference\"] == 0\n",
+    "    ):\n",
     "        zero_rates.append(ba)\n",
     "print(f\"Note {zero_rates} have zero rates in OGE data\")\n",
-    "#out = out.drop(labels=todrop)\n",
+    "# out = out.drop(labels=todrop)\n",
     "# exclude BAs with zero net gen according to our data\n",
     "zero_gen = []\n",
-    "for ba in out.index: \n",
-    "    if (out.loc[ba, \"Annual BA generation\"] == 0):\n",
+    "for ba in out.index:\n",
+    "    if out.loc[ba, \"Annual BA generation\"] == 0:\n",
     "        zero_gen.append(ba)\n",
     "print(f\"Dropping {zero_gen} because they have zero generation in OGE data\")\n",
     "out = out.drop(labels=zero_gen)\n",
     "\n",
-    "out.to_csv(f\"{filepaths.data_folder()}/outputs/{year}/validation_metrics/us_units/compare_real_time_rates.csv\")"
+    "out.to_csv(\n",
+    "    f\"{filepaths.data_folder()}/outputs/{year}/validation_metrics/us_units/compare_real_time_rates.csv\"\n",
+    ")"
    ]
   },
   {
@@ -377,11 +457,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "out_tbl = out.copy()#.round(2)\n",
-    "out_tbl[\"Annual BA generation\"] = out_tbl[\"Annual BA generation\"]/1000000 # convert to millions\n",
-    "out_tbl[\"Difference as percent of OGE\"] = out_tbl[\"Difference as percent of OGE\"]*100 # convert to %\n",
+    "out_tbl = out.copy()  # .round(2)\n",
+    "out_tbl[\"Annual BA generation\"] = (\n",
+    "    out_tbl[\"Annual BA generation\"] / 1000000\n",
+    ")  # convert to millions\n",
+    "out_tbl[\"Difference as percent of OGE\"] = (\n",
+    "    out_tbl[\"Difference as percent of OGE\"] * 100\n",
+    ")  # convert to %\n",
     "out_tbl = out_tbl.round(2)\n",
-    "for line in out_tbl.to_markdown().split(\"/n\"): \n",
+    "for line in out_tbl.to_markdown().split(\"/n\"):\n",
     "    print(line)"
    ]
   },
@@ -411,47 +495,80 @@
     "ba_of_interest = \"BPAT\"\n",
     "\n",
     "\n",
-    "\n",
-    "real_time = pd.read_csv(f\"{filepaths.data_folder()}/outputs/{year}/validation/real_time_rate/{ba_of_interest}.csv\", index_col=0, parse_dates=True)\n",
+    "real_time = pd.read_csv(\n",
+    "    f\"{filepaths.data_folder()}/outputs/{year}/validation/real_time_rate/{ba_of_interest}.csv\",\n",
+    "    index_col=0,\n",
+    "    parse_dates=True,\n",
+    ")\n",
     "real_time = real_time[\"2020-01-01T00:00\":]\n",
     "if ba_of_interest == \"NYIS\":\n",
-    "    # NYIS has a hole in the EIA data that's not there in ISO data: fill it \n",
-    "    nyis_hole = pd.Series(data=[313, 287.79, 262.215], index=[\"2020-03-30T01:00+00\", \"2020-03-30T02:00+00\", \"2020-03-30T03:00+00\"])\n",
+    "    # NYIS has a hole in the EIA data that's not there in ISO data: fill it\n",
+    "    nyis_hole = pd.Series(\n",
+    "        data=[313, 287.79, 262.215],\n",
+    "        index=[\"2020-03-30T01:00+00\", \"2020-03-30T02:00+00\", \"2020-03-30T03:00+00\"],\n",
+    "    )\n",
     "    real_time.loc[nyis_hole.index, \"adjusted_rate\"] = nyis_hole\n",
     "\n",
-    "hourly_consumed = pd.read_csv(consumed_path+ba_of_interest+\".csv\",\n",
-    "    usecols=[\"datetime_utc\", \"consumed_co2_rate_lb_per_mwh_for_electricity\", \"consumed_co2_rate_lb_per_mwh_for_electricity_adjusted\"], \n",
-    "    index_col=\"datetime_utc\", parse_dates=True)\n",
-    "hourly_generated = pd.read_csv(gen_path+ba_of_interest+\".csv\", \n",
-    "    usecols=[\"datetime_utc\", \"generated_co2_rate_lb_per_mwh_for_electricity\", \"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\", \"co2_mass_lb\", \"fuel_category\"], \n",
-    "    index_col=\"datetime_utc\", parse_dates=True)\n",
+    "hourly_consumed = pd.read_csv(\n",
+    "    consumed_path + ba_of_interest + \".csv\",\n",
+    "    usecols=[\n",
+    "        \"datetime_utc\",\n",
+    "        \"consumed_co2_rate_lb_per_mwh_for_electricity\",\n",
+    "        \"consumed_co2_rate_lb_per_mwh_for_electricity_adjusted\",\n",
+    "    ],\n",
+    "    index_col=\"datetime_utc\",\n",
+    "    parse_dates=True,\n",
+    ")\n",
+    "hourly_generated = pd.read_csv(\n",
+    "    gen_path + ba_of_interest + \".csv\",\n",
+    "    usecols=[\n",
+    "        \"datetime_utc\",\n",
+    "        \"generated_co2_rate_lb_per_mwh_for_electricity\",\n",
+    "        \"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\",\n",
+    "        \"co2_mass_lb\",\n",
+    "        \"fuel_category\",\n",
+    "    ],\n",
+    "    index_col=\"datetime_utc\",\n",
+    "    parse_dates=True,\n",
+    ")\n",
     "\n",
-    "all_dat = pd.concat([real_time, hourly_consumed, hourly_generated.loc[hourly_generated.fuel_category==\"total\"]], axis='columns')\n",
+    "all_dat = pd.concat(\n",
+    "    [\n",
+    "        real_time,\n",
+    "        hourly_consumed,\n",
+    "        hourly_generated.loc[hourly_generated.fuel_category == \"total\"],\n",
+    "    ],\n",
+    "    axis=\"columns\",\n",
+    ")\n",
     "all_dat = all_dat.sort_index()\n",
     "\n",
-    "all_dat[\"percent_difs\"] = (all_dat[\"adjusted_rate\"] - all_dat[\"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\"])/all_dat[\"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\"]\n",
+    "all_dat[\"percent_difs\"] = (\n",
+    "    all_dat[\"adjusted_rate\"]\n",
+    "    - all_dat[\"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\"]\n",
+    ") / all_dat[\"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\"]\n",
     "\n",
-    "#all_dat = all_dat.loc[parse_dt(\"2020-07-19T00:00+00\"):parse_dt(\"2020-08-06T00:00+00\")]\n",
-    "#all_dat = all_dat.loc[parse_dt(\"2020-02-10T00:00+00\"):parse_dt(\"2020-02-28T00:00+00\")]\n",
+    "# all_dat = all_dat.loc[parse_dt(\"2020-07-19T00:00+00\"):parse_dt(\"2020-08-06T00:00+00\")]\n",
+    "# all_dat = all_dat.loc[parse_dt(\"2020-02-10T00:00+00\"):parse_dt(\"2020-02-28T00:00+00\")]\n",
     "\n",
-    "fig = px.line(all_dat, x=all_dat.index, y=[\"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\", \"adjusted_rate\"], \n",
+    "fig = px.line(\n",
+    "    all_dat,\n",
+    "    x=all_dat.index,\n",
+    "    y=[\"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\", \"adjusted_rate\"],\n",
     "    title=f\"Real time accuracy in {ba_of_interest}\",\n",
-    "    labels={\n",
-    "        \"value\":\"CO2 emission rate (lb/mwh)\",\n",
-    "        \"index\":\"Hour (UTC)\"\n",
-    "    }, \n",
-    "    template='plotly_white',\n",
+    "    labels={\"value\": \"CO2 emission rate (lb/mwh)\", \"index\": \"Hour (UTC)\"},\n",
+    "    template=\"plotly_white\",\n",
     ")\n",
     "\n",
     "newnames = {\n",
-    "    'generated_co2_rate_lb_per_mwh_for_electricity_adjusted': 'Historical benchmark',\n",
-    "    'adjusted_rate': 'Real-time data'}\n",
-    "fig.for_each_trace(lambda t: t.update(name = newnames[t.name]))\n",
-    "fig.update_layout(legend_title_text='')\n",
+    "    \"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\": \"Historical benchmark\",\n",
+    "    \"adjusted_rate\": \"Real-time data\",\n",
+    "}\n",
+    "fig.for_each_trace(lambda t: t.update(name=newnames[t.name]))\n",
+    "fig.update_layout(legend_title_text=\"\")\n",
     "fig.show()\n",
     "\n",
     "os.makedirs(f\"{filepaths.data_folder()}/outputs/viz/\", exist_ok=True)\n",
-    "#pio.write_image(fig, f\"{filepaths.data_folder()}/outputs/viz/{ba_of_interest}_aug_sm.jpg\", width=1000*(2/3), height=500*(2/3), scale=3)"
+    "# pio.write_image(fig, f\"{filepaths.data_folder()}/outputs/viz/{ba_of_interest}_aug_sm.jpg\", width=1000*(2/3), height=500*(2/3), scale=3)"
    ]
   },
   {
@@ -469,34 +586,60 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "### Plot natural gas emission rate: does this explain larger gap in summer? \n",
+    "### Plot natural gas emission rate: does this explain larger gap in summer?\n",
     "\n",
-    "hourly_rate = pd.read_csv(gen_path+ba_of_interest+\".csv\", \n",
-    "    usecols=[\"datetime_utc\", \"generated_co2_rate_lb_per_mwh_for_electricity\", \"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\", \"co2_mass_lb\", \"fuel_category\"], \n",
-    "    index_col=\"datetime_utc\", parse_dates=True)\n",
+    "hourly_rate = pd.read_csv(\n",
+    "    gen_path + ba_of_interest + \".csv\",\n",
+    "    usecols=[\n",
+    "        \"datetime_utc\",\n",
+    "        \"generated_co2_rate_lb_per_mwh_for_electricity\",\n",
+    "        \"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\",\n",
+    "        \"co2_mass_lb\",\n",
+    "        \"fuel_category\",\n",
+    "    ],\n",
+    "    index_col=\"datetime_utc\",\n",
+    "    parse_dates=True,\n",
+    ")\n",
     "hourly_rate = hourly_rate[hourly_rate.fuel_category == \"natural_gas\"]\n",
     "\n",
     "fig = go.Figure()\n",
-    "fig.add_trace(go.Scatter(x=hourly_rate.index, y=hourly_rate[\"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\"], name=\"Hourly emission rate\"))\n",
-    "fig.add_trace(go.Scatter(x=[parse_dt(\"2020-01-01T00:00\"), parse_dt(\"2021-01-01T00:00\")], \n",
-    "    y=[factors[\"adjusted\"][\"EIA.\"+ba_of_interest][\"natural_gas\"][\"value\"], factors[\"adjusted\"][\"EIA.\"+ba_of_interest][\"natural_gas\"][\"value\"]], \n",
-    "    name=\"eGRID annual emission rate\", mode=\"lines\"\n",
-    "))\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=hourly_rate.index,\n",
+    "        y=hourly_rate[\"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\"],\n",
+    "        name=\"Hourly emission rate\",\n",
+    "    )\n",
+    ")\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=[parse_dt(\"2020-01-01T00:00\"), parse_dt(\"2021-01-01T00:00\")],\n",
+    "        y=[\n",
+    "            factors[\"adjusted\"][\"EIA.\" + ba_of_interest][\"natural_gas\"][\"value\"],\n",
+    "            factors[\"adjusted\"][\"EIA.\" + ba_of_interest][\"natural_gas\"][\"value\"],\n",
+    "        ],\n",
+    "        name=\"eGRID annual emission rate\",\n",
+    "        mode=\"lines\",\n",
+    "    )\n",
+    ")\n",
     "\n",
     "fig.update_xaxes(range=(parse_dt(\"2020-01-01T00:00\"), parse_dt(\"2021-01-01T00:00\")))\n",
-    "fig.update_layout(template=\"plotly_white\", title=f\"Natural gas emission rates in {ba_of_interest}O\",\n",
-    "legend=dict(\n",
-    "    yanchor=\"top\",\n",
-    "    y=0.99,\n",
-    "    xanchor=\"left\",\n",
-    "    x=0.01\n",
-    "))\n",
+    "fig.update_layout(\n",
+    "    template=\"plotly_white\",\n",
+    "    title=f\"Natural gas emission rates in {ba_of_interest}O\",\n",
+    "    legend=dict(yanchor=\"top\", y=0.99, xanchor=\"left\", x=0.01),\n",
+    ")\n",
     "\n",
-    "fig.update_yaxes(title_text='Natural gas emission rate<br>(lb CO2/MWh)')\n",
+    "fig.update_yaxes(title_text=\"Natural gas emission rate<br>(lb CO2/MWh)\")\n",
     "\n",
     "fig.show()\n",
     "\n",
-    "pio.write_image(fig, f\"{filepaths.data_folder()}/outputs/viz/gas_rate_{ba_of_interest}.jpg\", width=1000*(4/5), height=500*(4/5), scale=3)"
+    "pio.write_image(\n",
+    "    fig,\n",
+    "    f\"{filepaths.data_folder()}/outputs/viz/gas_rate_{ba_of_interest}.jpg\",\n",
+    "    width=1000 * (4 / 5),\n",
+    "    height=500 * (4 / 5),\n",
+    "    scale=3,\n",
+    ")"
    ]
   },
   {
@@ -505,21 +648,28 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "oge_generation = pd.read_csv(gen_path+ba_of_interest+\".csv\", \n",
-    "    usecols=[\"datetime_utc\", \"fuel_category\", \"net_generation_mwh\"], \n",
-    "    index_col=\"datetime_utc\", parse_dates=True)\n",
-    "oge_generation = oge_generation.pivot(columns=\"fuel_category\", values=\"net_generation_mwh\")\n",
+    "oge_generation = pd.read_csv(\n",
+    "    gen_path + ba_of_interest + \".csv\",\n",
+    "    usecols=[\"datetime_utc\", \"fuel_category\", \"net_generation_mwh\"],\n",
+    "    index_col=\"datetime_utc\",\n",
+    "    parse_dates=True,\n",
+    ")\n",
+    "oge_generation = oge_generation.pivot(\n",
+    "    columns=\"fuel_category\", values=\"net_generation_mwh\"\n",
+    ")\n",
     "\n",
-    "# plot real-time and OGE per-fuel generation in FPC to identify source of neg correlation \n",
-    "eiacols = [f'EBA.{ba_of_interest}-ALL.NG.COL.H',\n",
-    " f'EBA.{ba_of_interest}-ALL.NG.NG.H',\n",
-    " f'EBA.{ba_of_interest}-ALL.NG.NUC.H',\n",
-    " f'EBA.{ba_of_interest}-ALL.NG.OIL.H',\n",
-    " f'EBA.{ba_of_interest}-ALL.NG.OTH.H',\n",
-    " f'EBA.{ba_of_interest}-ALL.NG.SUN.H',\n",
-    " f'EBA.{ba_of_interest}-ALL.NG.UNK.H',\n",
-    " f'EBA.{ba_of_interest}-ALL.NG.WAT.H',\n",
-    " f'EBA.{ba_of_interest}-ALL.NG.WND.H']\n",
+    "# plot real-time and OGE per-fuel generation in FPC to identify source of neg correlation\n",
+    "eiacols = [\n",
+    "    f\"EBA.{ba_of_interest}-ALL.NG.COL.H\",\n",
+    "    f\"EBA.{ba_of_interest}-ALL.NG.NG.H\",\n",
+    "    f\"EBA.{ba_of_interest}-ALL.NG.NUC.H\",\n",
+    "    f\"EBA.{ba_of_interest}-ALL.NG.OIL.H\",\n",
+    "    f\"EBA.{ba_of_interest}-ALL.NG.OTH.H\",\n",
+    "    f\"EBA.{ba_of_interest}-ALL.NG.SUN.H\",\n",
+    "    f\"EBA.{ba_of_interest}-ALL.NG.UNK.H\",\n",
+    "    f\"EBA.{ba_of_interest}-ALL.NG.WAT.H\",\n",
+    "    f\"EBA.{ba_of_interest}-ALL.NG.WND.H\",\n",
+    "]\n",
     "\n",
     "toplot = pd.concat([eia930[eiacols], oge_generation])"
    ]
@@ -539,29 +689,29 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# plot real-time and OGE per-fuel generation in FPC to identify source of neg correlation \n",
+    "# plot real-time and OGE per-fuel generation in FPC to identify source of neg correlation\n",
     "plotcols = [\n",
-    " #f'EBA.{ba_of_interest}-ALL.NG.COL.H',\n",
-    " #f'EBA.{ba_of_interest}-ALL.NG.NG.H',\n",
-    " #f'EBA.{ba_of_interest}-ALL.NG.NUC.H',\n",
-    " #f'EBA.{ba_of_interest}-ALL.NG.OIL.H',\n",
-    " f'EBA.{ba_of_interest}-ALL.NG.OTH.H',\n",
-    " #f'EBA.{ba_of_interest}-ALL.NG.SUN.H',\n",
-    " f'EBA.{ba_of_interest}-ALL.NG.UNK.H',\n",
-    " f'EBA.{ba_of_interest}-ALL.NG.WAT.H',\n",
-    " #f'EBA.{ba_of_interest}-ALL.NG.WND.H',\n",
-    " #\"biomass\",\n",
-    " #\"natural_gas\",\n",
-    " #\"petroleum\",\n",
-    " #\"solar\",\n",
-    " #\"total\",\n",
-    " #\"waste\",\n",
-    " #\"geothermal\", \n",
-    " \"hydro\",\n",
-    " #\"wind\",\n",
-    " ]\n",
+    "    # f'EBA.{ba_of_interest}-ALL.NG.COL.H',\n",
+    "    # f'EBA.{ba_of_interest}-ALL.NG.NG.H',\n",
+    "    # f'EBA.{ba_of_interest}-ALL.NG.NUC.H',\n",
+    "    # f'EBA.{ba_of_interest}-ALL.NG.OIL.H',\n",
+    "    f\"EBA.{ba_of_interest}-ALL.NG.OTH.H\",\n",
+    "    # f'EBA.{ba_of_interest}-ALL.NG.SUN.H',\n",
+    "    f\"EBA.{ba_of_interest}-ALL.NG.UNK.H\",\n",
+    "    f\"EBA.{ba_of_interest}-ALL.NG.WAT.H\",\n",
+    "    # f'EBA.{ba_of_interest}-ALL.NG.WND.H',\n",
+    "    # \"biomass\",\n",
+    "    # \"natural_gas\",\n",
+    "    # \"petroleum\",\n",
+    "    # \"solar\",\n",
+    "    # \"total\",\n",
+    "    # \"waste\",\n",
+    "    # \"geothermal\",\n",
+    "    \"hydro\",\n",
+    "    # \"wind\",\n",
+    "]\n",
     "\n",
-    "px.line(toplot[plotcols])\n"
+    "px.line(toplot[plotcols])"
    ]
   },
   {
@@ -570,7 +720,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# What plants "
+    "# What plants"
    ]
   },
   {
@@ -579,7 +729,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "px.histogram(all_dat, x=\"percent_difs\", title=\"NYIS hourly difference between benchmark and real-time<br>as percent of benchmark \")"
+    "px.histogram(\n",
+    "    all_dat,\n",
+    "    x=\"percent_difs\",\n",
+    "    title=\"NYIS hourly difference between benchmark and real-time<br>as percent of benchmark \",\n",
+    ")"
    ]
   },
   {
@@ -605,7 +759,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "(55539223793.10689 - 57691924000)/57691924000"
+    "(55539223793.10689 - 57691924000) / 57691924000"
    ]
   },
   {
@@ -633,7 +787,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "px.scatter(out, x=\"Difference as percent of OGE\", y=\"Correlation\", size=\"Annual BA generation\", template=\"plotly_white\")#, text=out.index)"
+    "px.scatter(\n",
+    "    out,\n",
+    "    x=\"Difference as percent of OGE\",\n",
+    "    y=\"Correlation\",\n",
+    "    size=\"Annual BA generation\",\n",
+    "    template=\"plotly_white\",\n",
+    ")  # , text=out.index)"
    ]
   },
   {
@@ -642,17 +802,38 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#fig = px.scatter(out, x=\"Annual BA generation\", y=\"Correlation\", template=\"plotly_white\")#, text=out.index)\n",
+    "# fig = px.scatter(out, x=\"Annual BA generation\", y=\"Correlation\", template=\"plotly_white\")#, text=out.index)\n",
     "fig = go.Figure()\n",
     "\n",
-    "fig.add_trace(go.Scatter(y=[-3000000,805000000], x=[1,1], line={\"width\":2, \"color\":\"lightslategrey\"}, mode=\"lines\"))\n",
-    "fig.add_trace( go.Scatter(y=out[\"Annual BA generation\"], x=out[\"Correlation\"], text=out.index, mode=\"markers\", marker={\"color\":\"rgb(17, 119, 51)\"})) #, color=\"Median rate\")#, text=out.index)\n",
-    "fig.update_yaxes(range=(-3000000,805000000))\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        y=[-3000000, 805000000],\n",
+    "        x=[1, 1],\n",
+    "        line={\"width\": 2, \"color\": \"lightslategrey\"},\n",
+    "        mode=\"lines\",\n",
+    "    )\n",
+    ")\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        y=out[\"Annual BA generation\"],\n",
+    "        x=out[\"Correlation\"],\n",
+    "        text=out.index,\n",
+    "        mode=\"markers\",\n",
+    "        marker={\"color\": \"rgb(17, 119, 51)\"},\n",
+    "    )\n",
+    ")  # , color=\"Median rate\")#, text=out.index)\n",
+    "fig.update_yaxes(range=(-3000000, 805000000))\n",
     "fig.update_layout(template=\"plotly_white\", showlegend=False)\n",
     "\n",
-    "fig.update_xaxes(dtick=.250)\n",
+    "fig.update_xaxes(dtick=0.250)\n",
     "fig.show()\n",
-    "pio.write_image(fig, f\"{filepaths.data_folder()}/outputs/viz/cor_ba_gen.jpg\", width=800*(1/2), height=900*(1/2), scale=4)"
+    "pio.write_image(\n",
+    "    fig,\n",
+    "    f\"{filepaths.data_folder()}/outputs/viz/cor_ba_gen.jpg\",\n",
+    "    width=800 * (1 / 2),\n",
+    "    height=900 * (1 / 2),\n",
+    "    scale=4,\n",
+    ")"
    ]
   },
   {
@@ -661,7 +842,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#px.scatter(out, x=\"Annual BA generation\", y=\"Difference as percent of OGE\")#, text=out.index)"
+    "# px.scatter(out, x=\"Annual BA generation\", y=\"Difference as percent of OGE\")#, text=out.index)"
    ]
   },
   {
@@ -672,13 +853,34 @@
    "source": [
     "fig = go.Figure()\n",
     "\n",
-    "fig.add_trace(go.Scatter(y=[-3000000,805000000], x=[0,0], line={\"width\":2, \"color\":\"lightslategrey\"}, mode=\"lines\"))\n",
-    "fig.add_trace( go.Scatter(y=out[\"Annual BA generation\"], x=out[\"Median rate difference\"], text=out.index, mode=\"markers\", marker={\"color\":\"rgb(17, 119, 51)\"})) #, color=\"Median rate\")#, text=out.index)\n",
-    "fig.update_yaxes(range=(-3000000,805000000))\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        y=[-3000000, 805000000],\n",
+    "        x=[0, 0],\n",
+    "        line={\"width\": 2, \"color\": \"lightslategrey\"},\n",
+    "        mode=\"lines\",\n",
+    "    )\n",
+    ")\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        y=out[\"Annual BA generation\"],\n",
+    "        x=out[\"Median rate difference\"],\n",
+    "        text=out.index,\n",
+    "        mode=\"markers\",\n",
+    "        marker={\"color\": \"rgb(17, 119, 51)\"},\n",
+    "    )\n",
+    ")  # , color=\"Median rate\")#, text=out.index)\n",
+    "fig.update_yaxes(range=(-3000000, 805000000))\n",
     "fig.update_layout(template=\"plotly_white\", showlegend=False)\n",
     "fig.update_xaxes(dtick=500)\n",
     "fig.show()\n",
-    "pio.write_image(fig, f\"{filepaths.data_folder()}/outputs/viz/dif_ba_gen.jpg\", width=800*(1/2), height=900*(1/2), scale=4)"
+    "pio.write_image(\n",
+    "    fig,\n",
+    "    f\"{filepaths.data_folder()}/outputs/viz/dif_ba_gen.jpg\",\n",
+    "    width=800 * (1 / 2),\n",
+    "    height=900 * (1 / 2),\n",
+    "    scale=4,\n",
+    ")"
    ]
   },
   {
@@ -703,7 +905,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dat = "
+    "# dat ="
    ]
   },
   {
@@ -719,9 +921,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "good = len(out[out[\"Difference as percent of OGE\"].abs() <= .1])\n",
-    "bad = len(out[out[\"Difference as percent of OGE\"].abs() > .1])\n",
-    "print(good/(bad+good))"
+    "good = len(out[out[\"Difference as percent of OGE\"].abs() <= 0.1])\n",
+    "bad = len(out[out[\"Difference as percent of OGE\"].abs() > 0.1])\n",
+    "print(good / (bad + good))"
    ]
   },
   {
@@ -730,9 +932,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "for col in out.columns: \n",
+    "for col in out.columns:\n",
     "    out = out.replace(np.inf, np.nan)\n",
-    "    out = out.replace(-1*np.inf, np.nan)\n",
+    "    out = out.replace(-1 * np.inf, np.nan)\n",
     "    non_nan_out = out.dropna(subset=col)\n",
     "    a = np.average(non_nan_out[col].abs(), weights=non_nan_out[\"Annual BA generation\"])\n",
     "    print(f\"{col} = {a}\")"
@@ -751,32 +953,64 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Plot and save all BAs \n",
-    "for ba_of_interest in os.listdir(f\"{filepaths.data_folder()}/outputs/2020/validation/real_time_rate/\"):\n",
+    "# Plot and save all BAs\n",
+    "for ba_of_interest in os.listdir(\n",
+    "    f\"{filepaths.data_folder()}/outputs/2020/validation/real_time_rate/\"\n",
+    "):\n",
     "    ba_of_interest = ba_of_interest.replace(\".csv\", \"\")\n",
     "    if \".DS_\" in ba_of_interest:\n",
     "        continue\n",
-    "    \n",
-    "    real_time = pd.read_csv(f\"{filepaths.data_folder()}/outputs/{year}/validation/real_time_rate/{ba_of_interest}.csv\", index_col=0, parse_dates=True)\n",
+    "\n",
+    "    real_time = pd.read_csv(\n",
+    "        f\"{filepaths.data_folder()}/outputs/{year}/validation/real_time_rate/{ba_of_interest}.csv\",\n",
+    "        index_col=0,\n",
+    "        parse_dates=True,\n",
+    "    )\n",
     "    real_time = real_time[\"2020-01-01T00:00\":]\n",
     "\n",
-    "    hourly_generated = pd.read_csv(gen_path+ba_of_interest+\".csv\", \n",
-    "        usecols=[\"datetime_utc\", \"generated_co2_rate_lb_per_mwh_for_electricity\", \"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\", \"co2_mass_lb\", \"fuel_category\"], \n",
-    "        index_col=\"datetime_utc\", parse_dates=True)\n",
+    "    hourly_generated = pd.read_csv(\n",
+    "        gen_path + ba_of_interest + \".csv\",\n",
+    "        usecols=[\n",
+    "            \"datetime_utc\",\n",
+    "            \"generated_co2_rate_lb_per_mwh_for_electricity\",\n",
+    "            \"generated_co2_rate_lb_per_mwh_for_electricity_adjusted\",\n",
+    "            \"co2_mass_lb\",\n",
+    "            \"fuel_category\",\n",
+    "        ],\n",
+    "        index_col=\"datetime_utc\",\n",
+    "        parse_dates=True,\n",
+    "    )\n",
     "\n",
-    "    all_dat = pd.concat([real_time, hourly_consumed, hourly_generated.loc[hourly_generated.fuel_category==\"total\"]], axis='columns')\n",
+    "    all_dat = pd.concat(\n",
+    "        [\n",
+    "            real_time,\n",
+    "            hourly_consumed,\n",
+    "            hourly_generated.loc[hourly_generated.fuel_category == \"total\"],\n",
+    "        ],\n",
+    "        axis=\"columns\",\n",
+    "    )\n",
     "    all_dat = all_dat.sort_index()\n",
     "\n",
-    "    fig = px.line(all_dat, x=all_dat.index, y=[\"generated_co2_rate_lb_per_mwh_for_electricity\", \"adjusted_rate\"], \n",
+    "    fig = px.line(\n",
+    "        all_dat,\n",
+    "        x=all_dat.index,\n",
+    "        y=[\"generated_co2_rate_lb_per_mwh_for_electricity\", \"adjusted_rate\"],\n",
     "        title=f\"{ba_of_interest} rate comparison\",\n",
-    "        labels={\n",
-    "            \"value\":\"Adjsuted CO2 emission rate (lb/mwh)\",\n",
-    "            \"index\":\"Hour\"\n",
-    "        })\n",
+    "        labels={\"value\": \"Adjsuted CO2 emission rate (lb/mwh)\", \"index\": \"Hour\"},\n",
+    "    )\n",
     "\n",
-    "    newnames = {'generated_co2_rate_lb_per_mwh_for_electricity': 'Our data', 'adjusted_rate': 'Real-time data'}\n",
-    "    fig.for_each_trace(lambda t: t.update(name = newnames[t.name]))\n",
-    "    pio.write_image(fig, f\"{filepaths.data_folder()}/outputs/viz/{ba_of_interest}.jpg\", width=1000, height=400, scale=3)"
+    "    newnames = {\n",
+    "        \"generated_co2_rate_lb_per_mwh_for_electricity\": \"Our data\",\n",
+    "        \"adjusted_rate\": \"Real-time data\",\n",
+    "    }\n",
+    "    fig.for_each_trace(lambda t: t.update(name=newnames[t.name]))\n",
+    "    pio.write_image(\n",
+    "        fig,\n",
+    "        f\"{filepaths.data_folder()}/outputs/viz/{ba_of_interest}.jpg\",\n",
+    "        width=1000,\n",
+    "        height=400,\n",
+    "        scale=3,\n",
+    "    )"
    ]
   },
   {
@@ -789,7 +1023,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "oge_update",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -803,9 +1037,8 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.8 | packaged by conda-forge | (main, Nov 24 2022, 14:07:00) [MSC v.1916 64 bit (AMD64)]"
+   "version": "3.10.4"
   },
-  "orig_nbformat": 4,
   "vscode": {
    "interpreter": {
     "hash": "a6b598b31fc646bdc0acd5c2004810e407b47ff3b1550e1d461e8498c70ba381"
@@ -813,5 +1046,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/notebooks/validation/validate_data_cleaning.ipynb b/notebooks/validation/validate_data_cleaning.ipynb
index 149f5f00..ebdd650e 100644
--- a/notebooks/validation/validate_data_cleaning.ipynb
+++ b/notebooks/validation/validate_data_cleaning.ipynb
@@ -12,9 +12,10 @@
     "%reload_ext autoreload\n",
     "%autoreload 2\n",
     "\n",
-    "# Tell python where to look for modules. \n",
+    "# Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/\")\n",
     "\n",
     "# import local modules\n",
     "import src.validation as validation\n",
@@ -22,7 +23,7 @@
     "from src.column_checks import get_dtypes\n",
     "\n",
     "year = 2020\n",
-    "path_prefix = ''\n",
+    "path_prefix = \"\"\n",
     "path_prefix = f\"{path_prefix}{year}\""
    ]
   },
@@ -44,7 +45,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "eia923_allocated = pd.read_csv(f'../data/outputs/{path_prefix}/eia923_allocated_{year}.csv', dtype=get_dtypes(), parse_dates=['report_date'])"
+    "eia923_allocated = pd.read_csv(\n",
+    "    f\"../data/outputs/{path_prefix}/eia923_allocated_{year}.csv\",\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"report_date\"],\n",
+    ")"
    ]
   },
   {
@@ -55,10 +60,21 @@
    "source": [
     "# perform checks on allocated data\n",
     "# fuel consumption and co2 emissions should be positive\n",
-    "negative_test = validation.test_for_negative_values(eia923_allocated, ['fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb','co2_mass_lb_for_electricity','co2_mass_lb_adjusted'])\n",
+    "negative_test = validation.test_for_negative_values(\n",
+    "    eia923_allocated,\n",
+    "    [\n",
+    "        \"fuel_consumed_mmbtu\",\n",
+    "        \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "        \"co2_mass_lb\",\n",
+    "        \"co2_mass_lb_for_electricity\",\n",
+    "        \"co2_mass_lb_adjusted\",\n",
+    "    ],\n",
+    ")\n",
     "\n",
     "# if net generation is positive, fuel consumption should be non zero\n",
-    "missing_fuel_test = validation.test_for_missing_fuel(eia923_allocated, 'net_generation_mwh')\n",
+    "missing_fuel_test = validation.test_for_missing_fuel(\n",
+    "    eia923_allocated, \"net_generation_mwh\"\n",
+    ")\n",
     "\n",
     "# fuel consumed for electricity should be less than fuel consumed\n",
     "chp_allocation_test = validation.test_chp_allocation(eia923_allocated)\n",
@@ -67,16 +83,38 @@
     "missing_co2_test = validation.test_for_missing_co2(eia923_allocated)\n",
     "\n",
     "# check for generators with no data\n",
-    "missing_data_test = validation.test_for_missing_data(eia923_allocated, ['net_generation_mwh','fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb','co2_mass_lb_for_electricity','co2_mass_lb_adjusted'])\n",
+    "missing_data_test = validation.test_for_missing_data(\n",
+    "    eia923_allocated,\n",
+    "    [\n",
+    "        \"net_generation_mwh\",\n",
+    "        \"fuel_consumed_mmbtu\",\n",
+    "        \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "        \"co2_mass_lb\",\n",
+    "        \"co2_mass_lb_for_electricity\",\n",
+    "        \"co2_mass_lb_adjusted\",\n",
+    "    ],\n",
+    ")\n",
     "\n",
     "# check for generators with all data = 0\n",
-    "zero_data_test = validation.test_for_zero_data(eia923_allocated, ['net_generation_mwh','fuel_consumed_mmbtu','fuel_consumed_for_electricity_mmbtu','co2_mass_lb','co2_mass_lb_for_electricity','co2_mass_lb_adjusted'])\n",
+    "zero_data_test = validation.test_for_zero_data(\n",
+    "    eia923_allocated,\n",
+    "    [\n",
+    "        \"net_generation_mwh\",\n",
+    "        \"fuel_consumed_mmbtu\",\n",
+    "        \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "        \"co2_mass_lb\",\n",
+    "        \"co2_mass_lb_for_electricity\",\n",
+    "        \"co2_mass_lb_adjusted\",\n",
+    "    ],\n",
+    ")\n",
     "\n",
     "# check for missing energy source code\n",
     "missing_esc_test = validation.test_for_missing_energy_source_code(eia923_allocated)\n",
     "\n",
     "# check for missing and incorrect prime movers\n",
-    "incorrect_pm_test, missing_pm_test = validation.test_for_missing_incorrect_prime_movers(eia923_allocated, year)\n",
+    "incorrect_pm_test, missing_pm_test = validation.test_for_missing_incorrect_prime_movers(\n",
+    "    eia923_allocated, year\n",
+    ")\n",
     "\n",
     "# check for missing subplant ids\n",
     "eia_missing_subplant_test = validation.test_for_missing_subplant_id(eia923_allocated)"
@@ -109,7 +147,26 @@
    "source": [
     "# what percent of emissions is reported in CEMS vs EIA\n",
     "# NOTE: This does not include emissions only reported by CEMS, so the % may be higher\n",
-    "(eia923_allocated.groupby('hourly_data_source')[[\"net_generation_mwh\",\"fuel_consumed_mmbtu\", 'co2_mass_lb',\"co2_mass_lb_for_electricity\"]].sum() / eia923_allocated.groupby('hourly_data_source')[[\"net_generation_mwh\",\"fuel_consumed_mmbtu\", 'co2_mass_lb',\"co2_mass_lb_for_electricity\"]].sum().sum(axis=0)).round(3)"
+    "(\n",
+    "    eia923_allocated.groupby(\"hourly_data_source\")[\n",
+    "        [\n",
+    "            \"net_generation_mwh\",\n",
+    "            \"fuel_consumed_mmbtu\",\n",
+    "            \"co2_mass_lb\",\n",
+    "            \"co2_mass_lb_for_electricity\",\n",
+    "        ]\n",
+    "    ].sum()\n",
+    "    / eia923_allocated.groupby(\"hourly_data_source\")[\n",
+    "        [\n",
+    "            \"net_generation_mwh\",\n",
+    "            \"fuel_consumed_mmbtu\",\n",
+    "            \"co2_mass_lb\",\n",
+    "            \"co2_mass_lb_for_electricity\",\n",
+    "        ]\n",
+    "    ]\n",
+    "    .sum()\n",
+    "    .sum(axis=0)\n",
+    ").round(3)"
    ]
   },
   {
@@ -125,7 +182,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems = pd.read_csv(f'../data/outputs/{path_prefix}/cems_subplant_{year}.csv', dtype=get_dtypes(), parse_dates=['report_date'])"
+    "cems = pd.read_csv(\n",
+    "    f\"../data/outputs/{path_prefix}/cems_subplant_{year}.csv\",\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"report_date\"],\n",
+    ")"
    ]
   },
   {
@@ -138,7 +199,7 @@
     "cems_negative_test = validation.test_for_negative_values(cems)\n",
     "\n",
     "# if net generation is positive, fuel consumption should be non zero\n",
-    "cems_missing_fuel_test = validation.test_for_missing_fuel(cems,'gross_generation_mwh')\n",
+    "cems_missing_fuel_test = validation.test_for_missing_fuel(cems, \"gross_generation_mwh\")\n",
     "\n",
     "# fuel consumed for electricity should be less than fuel consumed\n",
     "cems_chp_allocation_test = validation.test_chp_allocation(cems)\n",
@@ -147,13 +208,13 @@
     "cems_missing_co2_test = validation.test_for_missing_co2(cems)\n",
     "\n",
     "# check for missing energy source code\n",
-    "#cems_missing_esc_test = validation.test_for_missing_energy_source_code(cems)\n",
+    "# cems_missing_esc_test = validation.test_for_missing_energy_source_code(cems)\n",
     "\n",
     "# test to make sure that there is a complete subplant mapping\n",
     "cems_missing_subplant_test = validation.test_for_missing_subplant_id(cems)\n",
     "\n",
     "# test to see if there are any net generation values greater than gross generation\n",
-    "gtn_test = validation.test_gtn_results(cems)\n"
+    "gtn_test = validation.test_gtn_results(cems)"
    ]
   },
   {
diff --git a/notebooks/validation/validate_hourly_profiles.ipynb b/notebooks/validation/validate_hourly_profiles.ipynb
index 1564a00a..ec589bc7 100644
--- a/notebooks/validation/validate_hourly_profiles.ipynb
+++ b/notebooks/validation/validate_hourly_profiles.ipynb
@@ -10,7 +10,7 @@
     "import sys\n",
     "import plotly.express as px\n",
     "\n",
-    "sys.path.append('../../../open-grid-emissions/')\n",
+    "sys.path.append(\"../../../open-grid-emissions/\")\n",
     "%reload_ext autoreload\n",
     "%autoreload 2\n",
     "from src.column_checks import get_dtypes, apply_dtypes\n",
@@ -23,7 +23,7 @@
     "\n",
     "# load data from csv\n",
     "year = 2020\n",
-    "path_prefix = ''\n",
+    "path_prefix = \"\"\n",
     "\n",
     "path_prefix = f\"{path_prefix}{year}\""
    ]
@@ -53,9 +53,27 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "eia930_data_raw = eia930.load_chalendar_for_pipeline(f\"../data/outputs/{path_prefix}/eia930/eia930_raw.csv\", year=year).pipe(eia930.remove_imputed_ones).pipe(eia930.remove_months_with_zero_data)\n",
-    "eia930_data_roll = eia930.load_chalendar_for_pipeline(f\"../data/outputs/{path_prefix}/eia930/eia930_rolling.csv\", year=year).pipe(eia930.remove_imputed_ones).pipe(eia930.remove_months_with_zero_data)\n",
-    "eia930_data_cleaned = eia930.load_chalendar_for_pipeline(f\"../data/outputs/{path_prefix}/eia930/eia930_elec.csv\", year=year).pipe(eia930.remove_imputed_ones).pipe(eia930.remove_months_with_zero_data)"
+    "eia930_data_raw = (\n",
+    "    eia930.load_chalendar_for_pipeline(\n",
+    "        f\"../data/outputs/{path_prefix}/eia930/eia930_raw.csv\", year=year\n",
+    "    )\n",
+    "    .pipe(eia930.remove_imputed_ones)\n",
+    "    .pipe(eia930.remove_months_with_zero_data)\n",
+    ")\n",
+    "eia930_data_roll = (\n",
+    "    eia930.load_chalendar_for_pipeline(\n",
+    "        f\"../data/outputs/{path_prefix}/eia930/eia930_rolling.csv\", year=year\n",
+    "    )\n",
+    "    .pipe(eia930.remove_imputed_ones)\n",
+    "    .pipe(eia930.remove_months_with_zero_data)\n",
+    ")\n",
+    "eia930_data_cleaned = (\n",
+    "    eia930.load_chalendar_for_pipeline(\n",
+    "        f\"../data/outputs/{path_prefix}/eia930/eia930_elec.csv\", year=year\n",
+    "    )\n",
+    "    .pipe(eia930.remove_imputed_ones)\n",
+    "    .pipe(eia930.remove_months_with_zero_data)\n",
+    ")"
    ]
   },
   {
@@ -64,11 +82,27 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems = pd.read_csv(f'../data/outputs/{path_prefix}/cems_subplant_{year}.csv', dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
-    "partial_cems_scaled = pd.read_csv(f'../data/outputs/{path_prefix}/partial_cems_scaled_{year}.csv', dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
-    "eia923_allocated = pd.read_csv(f'../data/outputs/{path_prefix}/eia923_allocated_{year}.csv', dtype=get_dtypes(), parse_dates=['report_date'])\n",
-    "plant_attributes = pd.read_csv(f\"../data/outputs/{path_prefix}/plant_static_attributes_{year}.csv\")\n",
-    "primary_fuel_table = plant_attributes.drop_duplicates(subset=\"plant_id_eia\")[[\"plant_id_eia\", \"plant_primary_fuel\"]]\n",
+    "cems = pd.read_csv(\n",
+    "    f\"../data/outputs/{path_prefix}/cems_subplant_{year}.csv\",\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"datetime_utc\", \"report_date\"],\n",
+    ")\n",
+    "partial_cems_scaled = pd.read_csv(\n",
+    "    f\"../data/outputs/{path_prefix}/partial_cems_scaled_{year}.csv\",\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"datetime_utc\", \"report_date\"],\n",
+    ")\n",
+    "eia923_allocated = pd.read_csv(\n",
+    "    f\"../data/outputs/{path_prefix}/eia923_allocated_{year}.csv\",\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"report_date\"],\n",
+    ")\n",
+    "plant_attributes = pd.read_csv(\n",
+    "    f\"../data/outputs/{path_prefix}/plant_static_attributes_{year}.csv\"\n",
+    ")\n",
+    "primary_fuel_table = plant_attributes.drop_duplicates(subset=\"plant_id_eia\")[\n",
+    "    [\"plant_id_eia\", \"plant_primary_fuel\"]\n",
+    "]\n",
     "\n",
     "# aggregate cems data to subplant level\n",
     "cems = data_cleaning.aggregate_cems_to_subplant(cems)\n",
@@ -80,7 +114,7 @@
     "monthly_eia_data_to_shape = eia923_allocated[\n",
     "    (eia923_allocated[\"hourly_data_source\"] == \"eia\")\n",
     "    & ~(eia923_allocated[\"fuel_consumed_mmbtu\"].isna())\n",
-    "]\n"
+    "]"
    ]
   },
   {
@@ -89,7 +123,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_to_graph = eia930_data_roll[(eia930_data_roll[\"fuel_category_eia930\"] == \"solar\") & (eia930_data_roll[\"report_date\"] == \"2020-07-01\")]\n",
+    "data_to_graph = eia930_data_roll[\n",
+    "    (eia930_data_roll[\"fuel_category_eia930\"] == \"solar\")\n",
+    "    & (eia930_data_roll[\"report_date\"] == \"2020-07-01\")\n",
+    "]\n",
     "\n",
     "px.line(data_to_graph, x=\"datetime_local\", y=\"net_generation_mwh_930\", color=\"ba_code\")"
    ]
@@ -144,7 +181,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "hourly_profiles_raw[[\"ba_code\", \"fuel_category\", \"report_date\", \"profile_method\"]].drop_duplicates().drop(columns=[\"ba_code\"]).pivot_table(index=\"fuel_category\", columns=\"profile_method\", aggfunc=\"count\").fillna(0).astype(int)"
+    "hourly_profiles_raw[\n",
+    "    [\"ba_code\", \"fuel_category\", \"report_date\", \"profile_method\"]\n",
+    "].drop_duplicates().drop(columns=[\"ba_code\"]).pivot_table(\n",
+    "    index=\"fuel_category\", columns=\"profile_method\", aggfunc=\"count\"\n",
+    ").fillna(0).astype(int)"
    ]
   },
   {
@@ -163,7 +204,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "national_validation = validation.validate_national_imputation_method(hourly_profiles_raw, year)\n",
+    "national_validation = validation.validate_national_imputation_method(\n",
+    "    hourly_profiles_raw, year\n",
+    ")\n",
     "national_validation.groupby([\"fuel_category\"]).mean()[\"imputed_profile\"]"
    ]
   },
@@ -180,7 +223,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "hourly_profiles_roll[[\"ba_code\", \"fuel_category\", \"report_date\", \"profile_method\"]].drop_duplicates().drop(columns=[\"ba_code\"]).pivot_table(index=\"fuel_category\", columns=\"profile_method\", aggfunc=\"count\").fillna(0).astype(int)"
+    "hourly_profiles_roll[\n",
+    "    [\"ba_code\", \"fuel_category\", \"report_date\", \"profile_method\"]\n",
+    "].drop_duplicates().drop(columns=[\"ba_code\"]).pivot_table(\n",
+    "    index=\"fuel_category\", columns=\"profile_method\", aggfunc=\"count\"\n",
+    ").fillna(0).astype(int)"
    ]
   },
   {
@@ -199,7 +246,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "national_validation = validation.validate_national_imputation_method(hourly_profiles_roll, year)\n",
+    "national_validation = validation.validate_national_imputation_method(\n",
+    "    hourly_profiles_roll, year\n",
+    ")\n",
     "national_validation.groupby([\"fuel_category\"]).mean()[\"imputed_profile\"]"
    ]
   },
@@ -216,7 +265,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "hourly_profiles_cleaned[[\"ba_code\", \"fuel_category\", \"report_date\", \"profile_method\"]].drop_duplicates().drop(columns=[\"ba_code\"]).pivot_table(index=\"fuel_category\", columns=\"profile_method\", aggfunc=\"count\").fillna(0).astype(int)"
+    "hourly_profiles_cleaned[\n",
+    "    [\"ba_code\", \"fuel_category\", \"report_date\", \"profile_method\"]\n",
+    "].drop_duplicates().drop(columns=[\"ba_code\"]).pivot_table(\n",
+    "    index=\"fuel_category\", columns=\"profile_method\", aggfunc=\"count\"\n",
+    ").fillna(0).astype(int)"
    ]
   },
   {
@@ -225,7 +278,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "diba_validation = validation.validate_diba_imputation_method(hourly_profiles_cleaned, year)\n",
+    "diba_validation = validation.validate_diba_imputation_method(\n",
+    "    hourly_profiles_cleaned, year\n",
+    ")\n",
     "diba_validation.groupby([\"fuel_category\"]).mean()[\"imputed_profile\"]"
    ]
   },
@@ -235,7 +290,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "national_validation = validation.validate_national_imputation_method(hourly_profiles_cleaned, year)\n",
+    "national_validation = validation.validate_national_imputation_method(\n",
+    "    hourly_profiles_cleaned, year\n",
+    ")\n",
     "national_validation.groupby([\"fuel_category\"]).mean()[\"imputed_profile\"]"
    ]
   },
@@ -252,8 +309,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "hourly_profiles = pd.read_csv(f\"../data/outputs/{path_prefix}/hourly_profiles_{year}.csv\")\n",
-    "shaped_eia923_data = pd.read_csv(f'../data/outputs/{path_prefix}/shaped_eia923_data_{year}.csv', dtype=get_dtypes())"
+    "hourly_profiles = pd.read_csv(\n",
+    "    f\"../data/outputs/{path_prefix}/hourly_profiles_{year}.csv\"\n",
+    ")\n",
+    "shaped_eia923_data = pd.read_csv(\n",
+    "    f\"../data/outputs/{path_prefix}/shaped_eia923_data_{year}.csv\", dtype=get_dtypes()\n",
+    ")"
    ]
   },
   {
@@ -271,9 +332,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_to_graph = hourly_profiles[(hourly_profiles[\"fuel_category\"] == \"natural_gas\") & (hourly_profiles[\"ba_code\"] == \"ERCO\")]\n",
+    "data_to_graph = hourly_profiles[\n",
+    "    (hourly_profiles[\"fuel_category\"] == \"natural_gas\")\n",
+    "    & (hourly_profiles[\"ba_code\"] == \"ERCO\")\n",
+    "]\n",
     "\n",
-    "px.line(data_to_graph, x=\"datetime_local\", y=[\"eia930_profile\",\"cems_profile\",\"residual_profile\",\"scaled_residual_profile\"])"
+    "px.line(\n",
+    "    data_to_graph,\n",
+    "    x=\"datetime_local\",\n",
+    "    y=[\"eia930_profile\", \"cems_profile\", \"residual_profile\", \"scaled_residual_profile\"],\n",
+    ")"
    ]
   },
   {
@@ -282,7 +350,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_to_graph = shaped_eia923_data[(shaped_eia923_data[\"fuel_category\"] == \"natural_gas\") & (shaped_eia923_data[\"ba_code\"] == \"ERCO\")]\n",
+    "data_to_graph = shaped_eia923_data[\n",
+    "    (shaped_eia923_data[\"fuel_category\"] == \"natural_gas\")\n",
+    "    & (shaped_eia923_data[\"ba_code\"] == \"ERCO\")\n",
+    "]\n",
     "\n",
     "px.line(data_to_graph, x=\"datetime_utc\", y=[\"net_generation_mwh\"])"
    ]
@@ -303,7 +374,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "national_validation = validation.validate_national_imputation_method(hourly_profiles, year)\n",
+    "national_validation = validation.validate_national_imputation_method(\n",
+    "    hourly_profiles, year\n",
+    ")\n",
     "national_validation.groupby([\"fuel_category\"]).mean()[\"imputed_profile\"]"
    ]
   },
@@ -318,11 +391,7 @@
     "    [\"ba_code\", \"fuel_category\", \"report_date\", \"profile_method\"]\n",
     "].drop_duplicates().drop(columns=[\"ba_code\"]).pivot_table(\n",
     "    index=\"fuel_category\", columns=\"profile_method\", aggfunc=\"count\"\n",
-    ").fillna(\n",
-    "    0\n",
-    ").astype(\n",
-    "    int\n",
-    ")"
+    ").fillna(0).astype(int)"
    ]
   }
  ],
diff --git a/notebooks/validation/validate_negative_profiles.ipynb b/notebooks/validation/validate_negative_profiles.ipynb
index 486b1322..27c44e04 100644
--- a/notebooks/validation/validate_negative_profiles.ipynb
+++ b/notebooks/validation/validate_negative_profiles.ipynb
@@ -17,7 +17,7 @@
     "# # Tell python where to look for modules.\n",
     "import sys\n",
     "\n",
-    "sys.path.append('../../../open-grid-emissions/')\n",
+    "sys.path.append(\"../../../open-grid-emissions/\")\n",
     "\n",
     "# import local modules\n",
     "import src.load_data as load_data\n",
@@ -39,12 +39,26 @@
    "source": [
     "# load data from csv\n",
     "year = 2020\n",
-    "path_prefix = ''\n",
+    "path_prefix = \"\"\n",
     "\n",
-    "cems = pd.read_csv(f'../data/outputs/{path_prefix}{year}/cems_{year}.csv', dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
-    "partial_cems_scaled = pd.read_csv(f'../data/outputs/{path_prefix}{year}/partial_cems_scaled_{year}.csv', dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])\n",
-    "eia923_allocated = pd.read_csv(f'../data/outputs/{path_prefix}{year}/eia923_allocated_{year}.csv', dtype=get_dtypes(), parse_dates=['report_date'])\n",
-    "plant_attributes = pd.read_csv(f\"../data/outputs/{path_prefix}{year}/plant_static_attributes_{year}.csv\")"
+    "cems = pd.read_csv(\n",
+    "    f\"../data/outputs/{path_prefix}{year}/cems_{year}.csv\",\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"datetime_utc\", \"report_date\"],\n",
+    ")\n",
+    "partial_cems_scaled = pd.read_csv(\n",
+    "    f\"../data/outputs/{path_prefix}{year}/partial_cems_scaled_{year}.csv\",\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"datetime_utc\", \"report_date\"],\n",
+    ")\n",
+    "eia923_allocated = pd.read_csv(\n",
+    "    f\"../data/outputs/{path_prefix}{year}/eia923_allocated_{year}.csv\",\n",
+    "    dtype=get_dtypes(),\n",
+    "    parse_dates=[\"report_date\"],\n",
+    ")\n",
+    "plant_attributes = pd.read_csv(\n",
+    "    f\"../data/outputs/{path_prefix}{year}/plant_static_attributes_{year}.csv\"\n",
+    ")"
    ]
   },
   {
@@ -64,16 +78,41 @@
     "# validate method\n",
     "\n",
     "# merge together monthly subplant totals from EIA and calculated from CEMS\n",
-    "eia_netgen = eia923_allocated.groupby(['plant_id_eia',\"subplant_id\",\"report_date\"], dropna=False).sum(min_count=1)['net_generation_mwh'].reset_index().dropna(subset=\"net_generation_mwh\")\n",
-    "calculated_netgen = cems.groupby(['plant_id_eia',\"subplant_id\",\"report_date\"], dropna=False).sum()['net_generation_mwh'].reset_index()\n",
-    "validated_ng = eia_netgen.merge(calculated_netgen, how=\"inner\", on=['plant_id_eia',\"subplant_id\",\"report_date\"], suffixes=(\"_eia\",\"_calc\"))\n",
+    "eia_netgen = (\n",
+    "    eia923_allocated.groupby(\n",
+    "        [\"plant_id_eia\", \"subplant_id\", \"report_date\"], dropna=False\n",
+    "    )\n",
+    "    .sum(min_count=1)[\"net_generation_mwh\"]\n",
+    "    .reset_index()\n",
+    "    .dropna(subset=\"net_generation_mwh\")\n",
+    ")\n",
+    "calculated_netgen = (\n",
+    "    cems.groupby([\"plant_id_eia\", \"subplant_id\", \"report_date\"], dropna=False)\n",
+    "    .sum()[\"net_generation_mwh\"]\n",
+    "    .reset_index()\n",
+    ")\n",
+    "validated_ng = eia_netgen.merge(\n",
+    "    calculated_netgen,\n",
+    "    how=\"inner\",\n",
+    "    on=[\"plant_id_eia\", \"subplant_id\", \"report_date\"],\n",
+    "    suffixes=(\"_eia\", \"_calc\"),\n",
+    ")\n",
     "\n",
-    "validated_ng = validated_ng.groupby(\"plant_id_eia\").sum().reset_index().drop(columns=[\"subplant_id\"])\n",
+    "validated_ng = (\n",
+    "    validated_ng.groupby(\"plant_id_eia\")\n",
+    "    .sum()\n",
+    "    .reset_index()\n",
+    "    .drop(columns=[\"subplant_id\"])\n",
+    ")\n",
     "\n",
     "validated_ng = validated_ng.round(3)\n",
-    "validated_ng = validated_ng[validated_ng[[\"net_generation_mwh_eia\",\"net_generation_mwh_calc\"]].sum(axis=1) != 0]\n",
+    "validated_ng = validated_ng[\n",
+    "    validated_ng[[\"net_generation_mwh_eia\", \"net_generation_mwh_calc\"]].sum(axis=1) != 0\n",
+    "]\n",
     "\n",
-    "validated_ng['pct_error'] = (validated_ng['net_generation_mwh_calc'] - validated_ng['net_generation_mwh_eia']) / validated_ng['net_generation_mwh_eia']\n",
+    "validated_ng[\"pct_error\"] = (\n",
+    "    validated_ng[\"net_generation_mwh_calc\"] - validated_ng[\"net_generation_mwh_eia\"]\n",
+    ") / validated_ng[\"net_generation_mwh_eia\"]\n",
     "validated_ng.sort_values(by=\"pct_error\")"
    ]
   },
@@ -91,9 +130,13 @@
    "outputs": [],
    "source": [
     "# what is the most negative\n",
-    "negative_hourly = eia923_allocated.copy()[[\"plant_id_eia\",\"generator_id\",\"report_date\",\"net_generation_mwh\"]].dropna()\n",
+    "negative_hourly = eia923_allocated.copy()[\n",
+    "    [\"plant_id_eia\", \"generator_id\", \"report_date\", \"net_generation_mwh\"]\n",
+    "].dropna()\n",
     "negative_hourly[\"hours\"] = negative_hourly.report_date.dt.daysinmonth * 24\n",
-    "negative_hourly[\"avg_generation_mw\"] = negative_hourly[\"net_generation_mwh\"] / negative_hourly[\"hours\"]\n",
+    "negative_hourly[\"avg_generation_mw\"] = (\n",
+    "    negative_hourly[\"net_generation_mwh\"] / negative_hourly[\"hours\"]\n",
+    ")\n",
     "negative_hourly.sort_values(by=\"avg_generation_mw\")"
    ]
   },
@@ -127,24 +170,20 @@
     "    \"co2_mass_lb_adjusted\",\n",
     "]\n",
     "all_data = all_data.assign(\n",
-    "    reported_eia923=lambda x: np.where(\n",
-    "        x[columns_to_test].notnull().all(axis=1), 1, 0\n",
-    "    )\n",
+    "    reported_eia923=lambda x: np.where(x[columns_to_test].notnull().all(axis=1), 1, 0)\n",
     ")\n",
     "\n",
     "cems_monthly = (\n",
-    "        cems_test.groupby(\n",
-    "            [\"plant_id_eia\", \"subplant_id\",\"report_date\"], dropna=False\n",
-    "        )\n",
-    "        .count()[\"datetime_utc\"].reset_index())\n",
+    "    cems_test.groupby([\"plant_id_eia\", \"subplant_id\", \"report_date\"], dropna=False)\n",
+    "    .count()[\"datetime_utc\"]\n",
+    "    .reset_index()\n",
+    ")\n",
     "\n",
     "cems_monthly[\"hourly_data_source\"] = \"cems\"\n",
     "\n",
     "# merge in the data source column from CEMS\n",
     "all_data = all_data.merge(\n",
-    "    cems_monthly[\n",
-    "        [\"plant_id_eia\", \"subplant_id\", \"report_date\", \"hourly_data_source\"]\n",
-    "    ],\n",
+    "    cems_monthly[[\"plant_id_eia\", \"subplant_id\", \"report_date\", \"hourly_data_source\"]],\n",
     "    how=\"left\",\n",
     "    on=[\"plant_id_eia\", \"subplant_id\", \"report_date\"],\n",
     "    validate=\"m:1\",\n",
@@ -153,7 +192,11 @@
     "# for the remaining plants, identify the hourly data source as EIA\n",
     "all_data[\"hourly_data_source\"] = all_data[\"hourly_data_source\"].fillna(\"eia\")\n",
     "\n",
-    "all_data.loc[(all_data[\"hourly_data_source\"] == \"cems\") & (~all_data[\"net_generation_mwh\"].isna()), \"hourly_data_source\"] = \"both\"\n",
+    "all_data.loc[\n",
+    "    (all_data[\"hourly_data_source\"] == \"cems\")\n",
+    "    & (~all_data[\"net_generation_mwh\"].isna()),\n",
+    "    \"hourly_data_source\",\n",
+    "] = \"both\"\n",
     "\n",
     "all_data"
    ]
@@ -164,7 +207,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "gtn_conversions = gross_to_net_generation.calculate_gross_to_net_conversion_factors(cems_test, eia_test, plant_attributes, year)\n",
+    "gtn_conversions = gross_to_net_generation.calculate_gross_to_net_conversion_factors(\n",
+    "    cems_test, eia_test, plant_attributes, year\n",
+    ")\n",
     "\n",
     "factors_to_use = gross_to_net_generation.filter_gtn_conversion_factors(gtn_conversions)\n",
     "\n",
@@ -211,17 +256,23 @@
     "    cems_test[\"gross_generation_mwh\"] + cems_test[\"annual_plant_shift_mw\"]\n",
     ")\n",
     "\n",
-    "cems_test.loc[cems_test[\"net_generation_mwh\"].isna(), \"gtn_method\"] = \"4_annual_plant_ratio\"\n",
+    "cems_test.loc[\n",
+    "    cems_test[\"net_generation_mwh\"].isna(), \"gtn_method\"\n",
+    "] = \"4_annual_plant_ratio\"\n",
     "cems_test[\"net_generation_mwh\"] = cems_test[\"net_generation_mwh\"].fillna(\n",
     "    cems_test[\"gross_generation_mwh\"] * cems_test[\"annual_plant_ratio\"]\n",
     ")\n",
     "\n",
-    "cems_test.loc[cems_test[\"net_generation_mwh\"].isna(), \"gtn_method\"] = \"5_annual_fuel_ratio\"\n",
+    "cems_test.loc[\n",
+    "    cems_test[\"net_generation_mwh\"].isna(), \"gtn_method\"\n",
+    "] = \"5_annual_fuel_ratio\"\n",
     "cems_test[\"net_generation_mwh\"] = cems_test[\"net_generation_mwh\"].fillna(\n",
     "    cems_test[\"gross_generation_mwh\"] * cems_test[\"annual_fuel_ratio\"]\n",
     ")\n",
     "\n",
-    "cems_test.loc[cems_test[\"net_generation_mwh\"].isna(), \"gtn_method\"] = \"6_gross_equals_net\"\n",
+    "cems_test.loc[\n",
+    "    cems_test[\"net_generation_mwh\"].isna(), \"gtn_method\"\n",
+    "] = \"6_gross_equals_net\"\n",
     "cems_test[\"net_generation_mwh\"] = cems_test[\"net_generation_mwh\"].fillna(\n",
     "    cems_test[\"gross_generation_mwh\"]\n",
     ")\n",
@@ -229,7 +280,7 @@
     "# drop intermediate columns\n",
     "cems_test = cems_test.drop(\n",
     "    columns=[\n",
-    "        #\"data_source\",\n",
+    "        # \"data_source\",\n",
     "        \"annual_subplant_shift_mw\",\n",
     "        \"annual_plant_shift_mw\",\n",
     "        \"annual_subplant_ratio\",\n",
@@ -254,7 +305,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems_test.groupby([\"data_source\",\"subplant_id\"], dropna=False).sum()[[\"gross_generation_mwh\",\"net_generation_mwh\"]].reset_index()"
+    "cems_test.groupby([\"data_source\", \"subplant_id\"], dropna=False).sum()[\n",
+    "    [\"gross_generation_mwh\", \"net_generation_mwh\"]\n",
+    "].reset_index()"
    ]
   },
   {
@@ -263,7 +316,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "eia_test.groupby([\"hourly_data_source\",\"subplant_id\"], dropna=False).sum()[\"net_generation_mwh\"].reset_index()"
+    "eia_test.groupby([\"hourly_data_source\", \"subplant_id\"], dropna=False).sum()[\n",
+    "    \"net_generation_mwh\"\n",
+    "].reset_index()"
    ]
   },
   {
@@ -272,7 +327,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "factors_to_use.groupby([\"data_source\",\"subplant_id\"], dropna=False).sum()[\"net_generation_mwh\"].reset_index()"
+    "factors_to_use.groupby([\"data_source\", \"subplant_id\"], dropna=False).sum()[\n",
+    "    \"net_generation_mwh\"\n",
+    "].reset_index()"
    ]
   },
   {
@@ -297,9 +354,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_to_graph = hourly_profiles[(hourly_profiles[\"fuel_category\"] == \"coal\") & (hourly_profiles[\"ba_code\"] == \"MISO\")]\n",
+    "data_to_graph = hourly_profiles[\n",
+    "    (hourly_profiles[\"fuel_category\"] == \"coal\")\n",
+    "    & (hourly_profiles[\"ba_code\"] == \"MISO\")\n",
+    "]\n",
     "\n",
-    "px.line(data_to_graph, x=\"datetime_utc\", y=[\"eia930_profile\",\"cems_profile\",\"residual_profile\",\"scaled_residual_profile\",\"shifted_residual_profile\",\"imputed_profile\",\"profile\"])"
+    "px.line(\n",
+    "    data_to_graph,\n",
+    "    x=\"datetime_utc\",\n",
+    "    y=[\n",
+    "        \"eia930_profile\",\n",
+    "        \"cems_profile\",\n",
+    "        \"residual_profile\",\n",
+    "        \"scaled_residual_profile\",\n",
+    "        \"shifted_residual_profile\",\n",
+    "        \"imputed_profile\",\n",
+    "        \"profile\",\n",
+    "    ],\n",
+    ")"
    ]
   },
   {
@@ -311,9 +383,13 @@
     "plant_to_test = 3399\n",
     "subplant = 1\n",
     "\n",
-    "cems_to_graph = cems[(cems[\"plant_id_eia\"] == plant_to_test) & (cems[\"subplant_id\"] == subplant)]\n",
+    "cems_to_graph = cems[\n",
+    "    (cems[\"plant_id_eia\"] == plant_to_test) & (cems[\"subplant_id\"] == subplant)\n",
+    "]\n",
     "\n",
-    "px.line(cems_to_graph, x=\"datetime_utc\", y=[\"gross_generation_mwh\",\"net_generation_mwh\"])"
+    "px.line(\n",
+    "    cems_to_graph, x=\"datetime_utc\", y=[\"gross_generation_mwh\", \"net_generation_mwh\"]\n",
+    ")"
    ]
   }
  ],
diff --git a/notebooks/validation/validate_vs_egrid.ipynb b/notebooks/validation/validate_vs_egrid.ipynb
index 70ec8518..8e9bb8da 100644
--- a/notebooks/validation/validate_vs_egrid.ipynb
+++ b/notebooks/validation/validate_vs_egrid.ipynb
@@ -16,9 +16,10 @@
     "%reload_ext autoreload\n",
     "%autoreload 2\n",
     "\n",
-    "# Tell python where to look for modules. \n",
+    "# Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
     "\n",
     "# import local modules\n",
     "import load_data\n",
@@ -75,7 +76,7 @@
     ")\n",
     "\n",
     "# Load the eGRID plant table\n",
-    "egrid_plant = validation.load_egrid_plant_file(year)\n"
+    "egrid_plant = validation.load_egrid_plant_file(year)"
    ]
   },
   {
@@ -97,7 +98,7 @@
     ") = validation.identify_plants_missing_from_our_calculations(\n",
     "    egrid_plant, annual_plant_results, year\n",
     ")\n",
-    "missing_from_calc\n"
+    "missing_from_calc"
    ]
   },
   {
@@ -129,7 +130,7 @@
     "        \"fuel_consumed_for_electricity_mmbtu\",\n",
     "        \"fuel_consumed_mmbtu\",\n",
     "    ],\n",
-    "]\n"
+    "]"
    ]
   },
   {
@@ -163,7 +164,7 @@
     "    (double_ids[\"net_generation_mwh_calc\"] - double_ids[\"net_generation_mwh_egrid\"])\n",
     "    / double_ids[\"net_generation_mwh_egrid\"]\n",
     ").round(3)\n",
-    "double_ids\n"
+    "double_ids"
    ]
   },
   {
@@ -175,7 +176,7 @@
     "# compare egrid vs eia plant ids\n",
     "annual_plant_results[\n",
     "    annual_plant_results[\"plant_id_egrid\"].duplicated(keep=False)\n",
-    "].groupby([\"plant_id_egrid\", \"plant_id_eia\"]).sum()\n"
+    "].groupby([\"plant_id_egrid\", \"plant_id_eia\"]).sum()"
    ]
   },
   {
@@ -191,7 +192,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ba_code_match = egrid_plant.set_index(\"plant_id_eia\")[[\"plant_name_eia\", \"ba_code\"]].merge(\n",
+    "ba_code_match = egrid_plant.set_index(\"plant_id_eia\")[\n",
+    "    [\"plant_name_eia\", \"ba_code\"]\n",
+    "].merge(\n",
     "    annual_plant_results.set_index(\"plant_id_eia\")[[\"ba_code\"]],\n",
     "    how=\"inner\",\n",
     "    left_index=True,\n",
@@ -201,7 +204,7 @@
     "\n",
     "# plants with missing ba code\n",
     "# ba_code_match[(ba_code_match['ba_code_calc'].isna()) & ~(ba_code_match['ba_code_egrid'].isna())]\n",
-    "ba_code_match[ba_code_match[\"ba_code_calc\"] != ba_code_match[\"ba_code_egrid\"]]\n"
+    "ba_code_match[ba_code_match[\"ba_code_calc\"] != ba_code_match[\"ba_code_egrid\"]]"
    ]
   },
   {
@@ -214,7 +217,7 @@
     "ba_code_match[\n",
     "    (ba_code_match[\"ba_code_calc\"] != ba_code_match[\"ba_code_egrid\"])\n",
     "    & ~(ba_code_match[\"ba_code_egrid\"].isna())\n",
-    "]\n"
+    "]"
    ]
   },
   {
@@ -242,7 +245,7 @@
     "\n",
     "fuel_match[\n",
     "    fuel_match[\"plant_primary_fuel_egrid\"] != fuel_match[\"plant_primary_fuel_calc\"]\n",
-    "]\n"
+    "]"
    ]
   },
   {
@@ -314,7 +317,7 @@
     "    eia923_allocated,\n",
     "    pudl_out,\n",
     "    PLANTS_MISSING_FROM_EGRID,\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -350,7 +353,7 @@
     "comparison_count, compared = validation.compare_plant_level_results_to_egrid(\n",
     "    segment_to_compare, egrid_plant, PLANTS_MISSING_FROM_EGRID\n",
     ")\n",
-    "comparison_count\n"
+    "comparison_count"
    ]
   },
   {
@@ -359,7 +362,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "validation.compare_egrid_fuel_total(segment_to_compare, egrid_plant).sum()\n"
+    "validation.compare_egrid_fuel_total(segment_to_compare, egrid_plant).sum()"
    ]
   },
   {
@@ -379,7 +382,7 @@
     "comparison_count, compared = validation.compare_plant_level_results_to_egrid(\n",
     "    segment_to_compare, egrid_plant, PLANTS_MISSING_FROM_EGRID\n",
     ")\n",
-    "comparison_count\n"
+    "comparison_count"
    ]
   },
   {
@@ -413,12 +416,32 @@
     "metric = \"so2_mass_lb\"\n",
     "status = \"<50%\"\n",
     "\n",
-    "comparison_df = comparison_df.merge(egrid_plant.set_index(\"plant_id_egrid\")[[metric]], how=\"left\", left_index=True, right_index=True, suffixes=(None,\"_egrid\"))\n",
-    "comparison_df = comparison_df.merge(annual_plant_results.set_index(\"plant_id_egrid\")[[metric]], how=\"left\", left_index=True, right_index=True, suffixes=(None,\"_calc\"))\n",
+    "comparison_df = comparison_df.merge(\n",
+    "    egrid_plant.set_index(\"plant_id_egrid\")[[metric]],\n",
+    "    how=\"left\",\n",
+    "    left_index=True,\n",
+    "    right_index=True,\n",
+    "    suffixes=(None, \"_egrid\"),\n",
+    ")\n",
+    "comparison_df = comparison_df.merge(\n",
+    "    annual_plant_results.set_index(\"plant_id_egrid\")[[metric]],\n",
+    "    how=\"left\",\n",
+    "    left_index=True,\n",
+    "    right_index=True,\n",
+    "    suffixes=(None, \"_calc\"),\n",
+    ")\n",
     "\n",
     "# show the data\n",
-    "columns_to_show = [\"plant_name_eia\", \"ba_code\", \"state\", metric, f\"{metric}_status\", f\"{metric}_egrid\", f\"{metric}_calc\"]\n",
-    "comparison_df.loc[(comparison_df[f\"{metric}_status\"] == status), columns_to_show]\n"
+    "columns_to_show = [\n",
+    "    \"plant_name_eia\",\n",
+    "    \"ba_code\",\n",
+    "    \"state\",\n",
+    "    metric,\n",
+    "    f\"{metric}_status\",\n",
+    "    f\"{metric}_egrid\",\n",
+    "    f\"{metric}_calc\",\n",
+    "]\n",
+    "comparison_df.loc[(comparison_df[f\"{metric}_status\"] == status), columns_to_show]"
    ]
   },
   {
@@ -459,7 +482,9 @@
     "# aggregate the plant data up to the BA level\n",
     "egrid_plant_ba_agg = egrid_plant.groupby([\"ba_code\"]).sum()[DATA_COLUMNS].reset_index()\n",
     "\n",
-    "egrid_plant_ba_agg[\"generated_co2_rate_lb_per_mwh\"] = egrid_plant_ba_agg[\"co2_mass_lb\"] / egrid_plant_ba_agg[\"net_generation_mwh\"]"
+    "egrid_plant_ba_agg[\"generated_co2_rate_lb_per_mwh\"] = (\n",
+    "    egrid_plant_ba_agg[\"co2_mass_lb\"] / egrid_plant_ba_agg[\"net_generation_mwh\"]\n",
+    ")"
    ]
   },
   {
@@ -488,7 +513,9 @@
     "\n",
     "calculated_ba = pd.concat(calculated_ba, axis=0)\n",
     "\n",
-    "calculated_ba[\"generated_co2_rate_lb_per_mwh\"] = calculated_ba[\"co2_mass_lb\"] / calculated_ba[\"net_generation_mwh\"]\n"
+    "calculated_ba[\"generated_co2_rate_lb_per_mwh\"] = (\n",
+    "    calculated_ba[\"co2_mass_lb\"] / calculated_ba[\"net_generation_mwh\"]\n",
+    ")"
    ]
   },
   {
@@ -503,7 +530,7 @@
     "        - egrid_plant_ba_agg.set_index(\"ba_code\").replace(0, 0.1)\n",
     "    )\n",
     "    / egrid_plant_ba_agg.set_index(\"ba_code\").replace(0, 0.1)\n",
-    ").round(2)\n"
+    ").round(2)"
    ]
   },
   {
@@ -583,7 +610,7 @@
     "]\n",
     "\n",
     "with pd.option_context(\"display.max_rows\", None, \"display.max_columns\", None):\n",
-    "    display(ba_metric[~(ba_metric[columns_to_check] == 0).all(axis=1)])\n"
+    "    display(ba_metric[~(ba_metric[columns_to_check] == 0).all(axis=1)])"
    ]
   },
   {
@@ -600,7 +627,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plant_to_explore = 58223\n"
+    "plant_to_explore = 58223"
    ]
   },
   {
@@ -609,7 +636,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "egrid_plant[egrid_plant[\"plant_id_eia\"] == plant_to_explore]\n"
+    "egrid_plant[egrid_plant[\"plant_id_eia\"] == plant_to_explore]"
    ]
   },
   {
@@ -618,7 +645,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "annual_plant_results[annual_plant_results[\"plant_id_eia\"] == plant_to_explore]\n"
+    "annual_plant_results[annual_plant_results[\"plant_id_eia\"] == plant_to_explore]"
    ]
   },
   {
@@ -627,7 +654,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "eia923_allocated[eia923_allocated[\"plant_id_eia\"] == plant_to_explore]\n"
+    "eia923_allocated[eia923_allocated[\"plant_id_eia\"] == plant_to_explore]"
    ]
   },
   {
@@ -639,7 +666,7 @@
     "eia923_allocated.loc[\n",
     "    eia923_allocated[\"plant_id_eia\"] == plant_to_explore,\n",
     "    [\"generator_id\", \"subplant_id\"],\n",
-    "].drop_duplicates()\n"
+    "].drop_duplicates()"
    ]
   }
  ],
diff --git a/notebooks/visualization/map_visualization.ipynb b/notebooks/visualization/map_visualization.ipynb
index 73284fbf..0f5c85e1 100644
--- a/notebooks/visualization/map_visualization.ipynb
+++ b/notebooks/visualization/map_visualization.ipynb
@@ -1,623 +1,788 @@
 {
-   "cells": [
-      {
-         "cell_type": "markdown",
-         "metadata": {},
-         "source": [
-            "# Maps for announcement blog post\n",
-            "\n",
-            "Visualization 2: The carbon intensity of consumed electricity differs from generated electricity\n",
-            "* Show a static carbon flow map focused on a single BA plus all directly-interconnected BAs\n",
-            "* Pick an hour when there is some particularly dirty electricity getting imported\n",
-            "* Each BA would be represented by a bubble, where the color changes based on carbon intensity, and carbon flows would be represented by colored arrows between the bubbles. \n",
-            "* To illustrate the difference between produced and consumed, we might want to have a pair of bubbles for each BA - one that shows the produced CI and one that shows the consumed CI. If we do this, we probably don’t want to vary the size of each bubble based on total generation. Or maybe we could do a split bubble - the top half shows produced CI and the bottom shows consumed CI.\n",
-            "\n",
-            "\n",
-            "Visualization 3: Animating hourly and consumed emissions for the whole county\n",
-            "* This animation should put the previous two concepts together and show how carbon flows and how CI changes across the entire country for a single day (or a week?)\n",
-            "* We could also potentially have two maps side by side: one that shows annual averages in bubbles (with no carbon flow), and one that shows the animated hourly flow (to really draw the distinction between annual and hourly datasets)\n",
-            "\n",
-            "\n",
-            "### Ref for making gif: \n",
-            "`https://stackoverflow.com/questions/753190/programmatically-generate-video-or-animated-gif-in-python`"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "import plotly.express as px\n",
-            "import plotly.graph_objects as go\n",
-            "from plotly.colors import * \n",
-            "import plotly.io as pio\n",
-            "import os\n",
-            "import pandas as pd\n",
-            "import numpy as np\n",
-            "from PIL import Image\n",
-            "import imageio"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "%reload_ext autoreload\n",
-            "%autoreload 2\n",
-            "\n",
-            "# # Tell python where to look for modules.\n",
-            "import sys\n",
-            "\n",
-            "sys.path.append(\"../../src/\")\n",
-            "\n",
-            "import output_data"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "ba_coords = pd.read_csv(\"resources/ba_coords.csv\", index_col=0, dtype={\"cx\":np.float64, \"cy\":np.float64})\n",
-            "ba_meta = pd.read_csv(\"../../data/manual/ba_reference.csv\", index_col=0)"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# Note: 150+ BAs are not in ba_coords\n",
-            "ba_list = ba_meta[(ba_meta.ba_category != \"misellaneous\") & (ba_meta.us_ba) & (ba_meta.index.isin(ba_coords.index))].index\n",
-            "ba_list = [ba for ba in ba_list if (f\"{ba}.csv\" in os.listdir(\"../../data/results/2020/power_sector_data/hourly/us_units/\"))]"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "cleaned_io = pd.read_csv(\"../../data/outputs/2020/eia930/eia930_elec.csv\", index_col=0, parse_dates=True)\n",
-            "cleaned_io = cleaned_io[[c for c in cleaned_io.columns if \".ID.\" in c]]"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "all = []\n",
-            "for ba in ba_list:\n",
-            "    produced = pd.read_csv(f\"../../data/results/2020/power_sector_data/hourly/us_units/{ba}.csv\", index_col=\"datetime_utc\", parse_dates=True, usecols=[\"datetime_utc\",\"fuel_category\", \"net_generation_mwh\", \"generated_co2_rate_lb_per_mwh_for_electricity\"])\n",
-            "    produced = produced[produced.fuel_category == \"total\"]\n",
-            "    produced = produced.drop(columns=[\"fuel_category\"])\n",
-            "    \n",
-            "    if ba_meta.loc[ba,\"ba_category\"] == \"generation_only\":\n",
-            "        consumed = pd.DataFrame(index=produced.index, columns=[[\"consumed_co2_rate_lb_per_mwh_for_electricity\"]], dtype=np.float64)\n",
-            "    else:\n",
-            "        consumed = pd.read_csv(f\"../../data/results/2020/carbon_accounting/hourly/us_units/{ba}.csv\", index_col=\"datetime_utc\", parse_dates=True, usecols=[\"datetime_utc\", \"consumed_co2_rate_lb_per_mwh_for_electricity\"])\n",
-            "    consumed.columns = consumed.columns.get_level_values(0)\n",
-            "\n",
-            "    both = pd.concat([produced, consumed], axis='columns')\n",
-            "    #both = both.loc[range_start:range_end]\n",
-            "    both = both.reset_index()\n",
-            "    both[\"BA\"] = ba\n",
-            "    all.append(both)\n",
-            "\n",
-            "all = pd.concat(all)"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# Add coordinates\n",
-            "all = all.merge(ba_coords, how='left', validate='many_to_one', left_on=\"BA\", right_index=True)"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# src: https://community.plotly.com/t/how-to-include-a-colorscale-for-color-of-line-graphs/38002/3 \n",
-            "from ast import literal_eval\n",
-            "def get_color_for_val(val, vmin, vmax, pl_colors):\n",
-            "    if pl_colors[0][:3] != 'rgb':\n",
-            "        raise ValueError('This function works only with Plotly  rgb-colorscales')\n",
-            "    if vmin >= vmax:\n",
-            "        raise ValueError('vmin should be < vmax')\n",
-            "\n",
-            "    scale = [round(k / (len(pl_colors)), 3) for k in range(len(pl_colors) + 1)]\n",
-            "\n",
-            "    colors_01 = np.array([literal_eval(color[3:]) for color in pl_colors]) / 255  # color codes in [0,1]\n",
-            "\n",
-            "    v = (val - vmin) / (vmax - vmin)  # val is mapped to v in [0,1]\n",
-            "    # find two consecutive values in plotly_scale such that   v is in  the corresponding interval\n",
-            "    idx = 0\n",
-            "\n",
-            "    while idx < (len(scale)-2) and (v > scale[idx + 1]):\n",
-            "        idx += 1\n",
-            "\n",
-            "    vv = (v - scale[idx]) / (scale[idx + 1] - scale[idx])\n",
-            "\n",
-            "    # get   [0,1]-valued color code representing the rgb color corresponding to val\n",
-            "    if idx == len(pl_colors)-1: # Make this work when some values exceed range\n",
-            "        val_color01 = colors_01[idx] # color by last color \n",
-            "    else: \n",
-            "        val_color01 = colors_01[idx] + vv * (colors_01[idx + 1] - colors_01[idx])\n",
-            "\n",
-            "    val_color_0255 = (255 * val_color01 + 0.5).astype(int)\n",
-            "    return f'rgb{str(tuple(val_color_0255))}'"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "range_start = \"2020-08-01T04:00+00\"\n",
-            "range_end = \"2020-08-3T04:00+00\"\n",
-            "\n",
-            "#range_start = \"2020-07-21T12:00+00\"\n",
-            "#range_end = \"2020-07-23T12:00+00\""
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "dir_name = \"maps\"\n",
-            "\n",
-            "c_max = 2200 #np.floor(all.generated_co2_rate_lb_per_mwh_for_electricity.max() + 100)\n",
-            "\n",
-            "hours = all.datetime_utc[(all.datetime_utc < pd.to_datetime(range_end)) & (all.datetime_utc > pd.to_datetime(range_start)) & (all.BA == \"CISO\")]\n",
-            "for hour in hours:\n",
-            "    print(hour, end=\"...\")\n",
-            "    io_toplot = cleaned_io.loc[hour]\n",
-            "    toplot = all[all.datetime_utc == hour]\n",
-            "    fig = go.Figure()\n",
-            "\n",
-            "    toplot.loc[toplot.net_generation_mwh < 1, \"net_generation_mwh\"] = 1\n",
-            "    sizes = np.log(toplot.net_generation_mwh)/np.log(1.5)\n",
-            "    offset = sizes/2.5\n",
-            "\n",
-            "    colorscale = diverging.RdYlGn_r\n",
-            "    #colorscale = cmocean.solar_r\n",
-            "\n",
-            "    ### From when \n",
-            "    # max_width = io_toplot.max()\n",
-            "    # width_factor = 8/max_width\n",
-            "    #width_factor = 1/200\n",
-            "    for name, val in io_toplot.iteritems():\n",
-            "        if val <= 0: \n",
-            "            continue \n",
-            "        bas = name.split(\".\")[1].split(\"-\")\n",
-            "        (ba1, ba2) = bas\n",
-            "\n",
-            "        next = False\n",
-            "        for ba in bas: \n",
-            "            if ba not in ba_coords.index:\n",
-            "                next=True\n",
-            "            if ba not in toplot.BA.unique():\n",
-            "                next=True\n",
-            "        if next:\n",
-            "            continue\n",
-            "\n",
-            "        color = toplot.loc[toplot.BA == ba1, \"generated_co2_rate_lb_per_mwh_for_electricity\"].to_numpy()[0]\n",
-            "\n",
-            "        fig.add_trace(\n",
-            "            go.Scatter(x = ba_coords.loc[bas,\"cx\"], y = ba_coords.loc[bas,\"cy\"], opacity=1.0,\n",
-            "                mode=\"lines\", line = dict(color=get_color_for_val(color, 0, c_max, colorscale), width=2), showlegend=False\n",
-            "            )\n",
-            "        )\n",
-            "\n",
-            "    ################################# Plot BAs \n",
-            "    toplot.loc[toplot.net_generation_mwh < 1, \"net_generation_mwh\"] = 1\n",
-            "    sizes = np.log(toplot.net_generation_mwh)/np.log(1.5)\n",
-            "    offset = sizes/1.6\n",
-            "\n",
-            "    # Zero-generation BAs: plot under BAs with non-zero gen \n",
-            "    zero_gen_bas = (toplot.net_generation_mwh == 1) # (we set to 1 above to make log work)\n",
-            "    fig.add_trace(\n",
-            "        go.Scatter(x=toplot.loc[zero_gen_bas,\"cx\"], y=toplot.loc[zero_gen_bas,\"cy\"]+(2/2.5), mode=\"markers\", \n",
-            "            hoverinfo=\"text\", text=toplot.loc[zero_gen_bas,\"BA\"], \n",
-            "            marker_symbol=\"triangle-up\",\n",
-            "            marker=dict(color='lightgrey', \n",
-            "                line=dict(width=1, color='DarkSlateGrey'),\n",
-            "                size=7, opacity=1.0,\n",
-            "                sizemode='diameter'),\n",
-            "            showlegend=False  \n",
-            "        )\n",
-            "    )\n",
-            "\n",
-            "    fig.add_trace(\n",
-            "        go.Scatter(x=toplot.cx, y=toplot.cy, mode=\"markers\", hoverinfo=\"text\", text=toplot.BA, \n",
-            "            marker_symbol=\"triangle-up\", \n",
-            "            opacity=1.0,\n",
-            "            marker=dict(color=toplot.generated_co2_rate_lb_per_mwh_for_electricity, size=sizes,\n",
-            "                sizemode='diameter', cmin=0, cmax=c_max, opacity=1.0,\n",
-            "                line=dict(width=1, color='DarkSlateGrey'),\n",
-            "            colorscale=\"rdylgn_r\"),\n",
-            "            name=\"Generated\",   \n",
-            "            showlegend=False\n",
-            "        )\n",
-            "    )\n",
-            "    consumed_toplot = ~toplot.consumed_co2_rate_lb_per_mwh_for_electricity.isna()\n",
-            "    fig.add_trace(\n",
-            "        go.Scatter(x=toplot.loc[consumed_toplot,\"cx\"], y=toplot.loc[consumed_toplot,\"cy\"]+offset[consumed_toplot], mode=\"markers\", \n",
-            "            hoverinfo=\"text\", text=toplot.loc[consumed_toplot,\"BA\"], \n",
-            "            marker_symbol=\"triangle-down\",\n",
-            "            marker=dict(color=toplot.loc[consumed_toplot,\"consumed_co2_rate_lb_per_mwh_for_electricity\"], \n",
-            "                size=sizes[consumed_toplot], opacity=1.0,\n",
-            "                line=dict(width=1, color='DarkSlateGrey'),\n",
-            "                sizemode='diameter', cmin=0, cmax=c_max, \n",
-            "                colorbar=dict(\n",
-            "                    title=\"Emission rate<br>(lbs/MWh)\", orientation='v', len=.8, \n",
-            "                    thickness=20, yanchor='bottom', y=0, xpad=20\n",
-            "                ),\n",
-            "                colorscale=\"rdylgn_r\"\n",
-            "            ),\n",
-            "            name=\"Consumed\", \n",
-            "            showlegend=False  \n",
-            "        )\n",
-            "    )\n",
-            "\n",
-            "    # Legends: don't want colored markers\n",
-            "    fig.add_trace(\n",
-            "        go.Scatter(x=[-10], y=[-10], mode=\"markers\", \n",
-            "            marker_symbol=\"triangle-up\",\n",
-            "            marker=dict(color='white', line=dict(width=2, color='DarkSlateGrey'), size=10),\n",
-            "            name=\"Generated\",   \n",
-            "        )\n",
-            "    )\n",
-            "    fig.add_trace(\n",
-            "        go.Scatter(x=[-10], y=[-10], mode=\"markers\", \n",
-            "            marker_symbol=\"triangle-down\",\n",
-            "            marker=dict(color='white', line=dict(width=2, color='DarkSlateGrey'), size=10),\n",
-            "            name=\"Consumed\",   \n",
-            "        )\n",
-            "    )\n",
-            "    fig.update_yaxes(range=(550,0)) # autorange=\"reversed\")\n",
-            "    fig.update_xaxes(range=(0,800))\n",
-            "\n",
-            "    # Add images\n",
-            "    fig.add_layout_image(\n",
-            "            dict(\n",
-            "                source=Image.open(\"resources/usa.png\"),\n",
-            "                xref=\"x\",\n",
-            "                yref=\"y\",\n",
-            "                x=10,\n",
-            "                y=0,\n",
-            "                sizex=790,\n",
-            "                sizey=550,\n",
-            "                sizing=\"stretch\",\n",
-            "                opacity=0.5,\n",
-            "                layer=\"below\")\n",
-            "    )\n",
-            "\n",
-            "    # Add images\n",
-            "    fig.add_layout_image(\n",
-            "            dict(\n",
-            "                source=Image.open(\"resources/legend_bottom_smaller.png\"),\n",
-            "                xref=\"x\",\n",
-            "                yref=\"y\",\n",
-            "                x=-20,\n",
-            "                y=400,\n",
-            "                sizex=260,\n",
-            "                sizey=200,\n",
-            "                sizing=\"contain\",\n",
-            "                opacity=1.0,\n",
-            "                layer=\"below\")\n",
-            "    )\n",
-            "\n",
-            "    # Set templates\n",
-            "    fig.update_layout(template=\"plotly_white\", width=800, height=600, \n",
-            "        yaxis_visible=False, xaxis_visible=False,\n",
-            "        title=hour.tz_convert(\"US/Eastern\").strftime(\"%B %-d, %Y - %-I:00 %p ET\"))\n",
-            "    #fig.show()\n",
-            "    os.makedirs(f\"outputs/{dir_name}/\", exist_ok=True)\n",
-            "    fig.write_image(f\"outputs/{dir_name}/{hour}.png\", scale=2)\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "## Make gif \n",
-            "images = []\n",
-            "files = [f for f in os.listdir(f\"outputs/{dir_name}/\") if \".png\" in f]\n",
-            "files.sort()\n",
-            "for f in files:\n",
-            "    images.append(imageio.imread(f\"outputs/{dir_name}/\"+f))\n",
-            "imageio.mimsave(f\"outputs/movie_{dir_name}.gif\", images)"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# Now just CISO and neighbors. \n",
-            "\n",
-            "# Get list of CISO neighbors \n",
-            "ciso_interchanges = [c for c in cleaned_io.columns if \"CISO\" in c]\n",
-            "ciso_bas = []\n",
-            "for ci in ciso_interchanges: \n",
-            "    ba1, ba2 = ci.split(\".\")[1].split(\"-\")\n",
-            "    if ba1 not in ciso_bas: \n",
-            "        ciso_bas.append(ba1)\n",
-            "    if ba2 not in ciso_bas:\n",
-            "        ciso_bas.append(ba2)\n",
-            "\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# Identify day with max rate difference in CISO \n",
-            "tester = (all[all.BA==\"CISO\"]).copy()\n",
-            "tester[\"difference\"] = tester.consumed_co2_rate_lb_per_mwh_for_electricity - tester.generated_co2_rate_lb_per_mwh_for_electricity\n",
-            "tester.difference.abs().max()\n",
-            "tester[tester.difference == tester.difference.abs().max()]"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "# Hour with max CISO difference generated / consumed \n",
-            "hour = '2020-09-25 12:00:00+00:00'\n",
-            "\n",
-            "io_toplot = cleaned_io.loc[hour, ciso_interchanges]\n",
-            "toplot = all[all.datetime_utc == hour]\n",
-            "toplot = toplot[toplot.BA.isin(ciso_bas)]\n",
-            "fig = go.Figure()\n",
-            "\n",
-            "colorscale = diverging.RdYlGn_r\n",
-            "#colorscale = cmocean.solar_r\n",
-            "\n",
-            "c_max = np.floor(toplot.generated_co2_rate_lb_per_mwh_for_electricity.max() + 100)\n",
-            "\n",
-            "### From when \n",
-            "# max_width = io_toplot.max()\n",
-            "# width_factor = 8/max_width\n",
-            "#width_factor = 1/200\n",
-            "for name, val in io_toplot.iteritems():\n",
-            "    if val <= 0: \n",
-            "        continue \n",
-            "    bas = name.split(\".\")[1].split(\"-\")\n",
-            "    (ba1, ba2) = bas\n",
-            "\n",
-            "    next = False\n",
-            "    for ba in bas: \n",
-            "        if ba not in ba_coords.index:\n",
-            "            next=True\n",
-            "        if ba not in toplot.BA.unique():\n",
-            "            next=True\n",
-            "    if next:\n",
-            "        continue\n",
-            "\n",
-            "    color = toplot.loc[toplot.BA == ba1, \"generated_co2_rate_lb_per_mwh_for_electricity\"].to_numpy()[0]\n",
-            "    print(color)\n",
-            "\n",
-            "    fig.add_trace(\n",
-            "        go.Scatter(x = ba_coords.loc[bas,\"cx\"], y = ba_coords.loc[bas,\"cy\"], opacity=1.0,\n",
-            "            mode=\"lines\", line = dict(color=get_color_for_val(color, 0, c_max, colorscale), width=2), showlegend=False\n",
-            "        )\n",
-            "    )\n",
-            "\n",
-            "################################# Plot BAs \n",
-            "toplot.loc[toplot.net_generation_mwh < 1, \"net_generation_mwh\"] = 1\n",
-            "sizes = np.log(toplot.net_generation_mwh)/np.log(1.5)\n",
-            "offset = sizes/1.6\n",
-            "fig.add_trace(\n",
-            "    go.Scatter(x=toplot.cx, y=toplot.cy-offset, mode=\"markers\", hoverinfo=\"text\", text=toplot.BA, \n",
-            "        marker_symbol=\"triangle-up\", \n",
-            "        opacity=1.0,\n",
-            "        marker=dict(line=dict(width=1, color='DarkSlateGrey'),\n",
-            "            color=toplot.generated_co2_rate_lb_per_mwh_for_electricity, size=sizes,\n",
-            "            sizemode='diameter', cmin=0, cmax=c_max, opacity=1.0,\n",
-            "        colorscale=\"rdylgn_r\"),\n",
-            "        name=\"Generated\",   \n",
-            "        showlegend=False\n",
-            "    )\n",
-            ")\n",
-            "fig.add_trace(\n",
-            "    go.Scatter(x=toplot.cx, y=toplot.cy, mode=\"markers\", hoverinfo=\"text\", text=toplot.BA, \n",
-            "        marker_symbol=\"triangle-down\",\n",
-            "        marker=dict(color=toplot.consumed_co2_rate_lb_per_mwh_for_electricity, size=sizes, opacity=1.0,\n",
-            "            line=dict(width=1, color='DarkSlateGrey'),\n",
-            "            sizemode='diameter', cmin=0, cmax=c_max, colorbar=dict(\n",
-            "            title=\"Emission rate<br>(lbs/MWh)\", orientation='v', len=.8, thickness=20, yanchor='bottom', y=0, xpad=20\n",
-            "        ),\n",
-            "        colorscale=\"rdylgn_r\"),\n",
-            "        name=\"Consumed\", \n",
-            "        showlegend=False  \n",
-            "    )\n",
-            ")\n",
-            "\n",
-            "\n",
-            "# Legends: don't want colored markers\n",
-            "# Legends: don't want colored markers\n",
-            "fig.add_trace(\n",
-            "    go.Scatter(x=[-10], y=[-10], mode=\"markers\", \n",
-            "        marker_symbol=\"triangle-up\",\n",
-            "        marker=dict(color='white', line=dict(width=2, color='DarkSlateGrey'), size=10),\n",
-            "        name=\"Generated\",   \n",
-            "    )\n",
-            ")\n",
-            "fig.add_trace(\n",
-            "    go.Scatter(x=[-10], y=[-10], mode=\"markers\", \n",
-            "        marker_symbol=\"triangle-down\",\n",
-            "        marker=dict(color='white', line=dict(width=2, color='DarkSlateGrey'), size=10),\n",
-            "        name=\"Consumed\",   \n",
-            "    )\n",
-            ")\n",
-            "fig.update_yaxes(range=(500,0)) # autorange=\"reversed\")\n",
-            "fig.update_xaxes(range=(0,200))\n",
-            "\n",
-            "## loop through the labels and add them as annotations\n",
-            "for x in zip(toplot.BA, toplot.cx, toplot.cy):\n",
-            "    left_bas = [\"BANC\",\"TIDC\",\"CISO\",\"LDWP\",\"IID\"]\n",
-            "    delta = (-12 if x[0] in left_bas else 12)\n",
-            "    fig.add_annotation(\n",
-            "        x=x[1] + delta,\n",
-            "        y=x[2],\n",
-            "        text=x[0],\n",
-            "        showarrow=False,\n",
-            "        xanchor=('right' if x[0] in left_bas else 'left')\n",
-            "    )\n",
-            "\n",
-            "# Add images\n",
-            "fig.add_layout_image(\n",
-            "        dict(\n",
-            "            source=Image.open(\"resources/usa.png\"),\n",
-            "            xref=\"x\",\n",
-            "            yref=\"y\",\n",
-            "            x=10,\n",
-            "            y=0,\n",
-            "            sizex=790,\n",
-            "            sizey=550,\n",
-            "            sizing=\"stretch\",\n",
-            "            opacity=0.5,\n",
-            "            layer=\"below\")\n",
-            ")\n",
-            "\n",
-            "# Add images\n",
-            "fig.add_layout_image(\n",
-            "        dict(\n",
-            "            source=Image.open(\"resources/legend_bottom_smaller.png\"),\n",
-            "            xref=\"x\",\n",
-            "            yref=\"y\",\n",
-            "            x=-20,\n",
-            "            y=360,\n",
-            "            sizex=220,\n",
-            "            sizey=144,\n",
-            "            sizing=\"contain\",\n",
-            "            opacity=1.0,\n",
-            "            layer=\"below\")\n",
-            ")\n",
-            "\n",
-            "# Set templates\n",
-            "fig.update_layout(template=\"plotly_white\", width=400, height=550,\n",
-            "    yaxis_visible=False, xaxis_visible=False,\n",
-            "    title=pd.to_datetime(hour).tz_convert(\"US/Pacific\").strftime(\"%B %-d, %Y - %-I:00 %p PT\")\n",
-            ")\n",
-            "fig.show()\n",
-            "fig.write_image(f\"outputs/viz2_legend.png\", scale=3) "
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "toplot[\"difference\"] = (toplot.generated_co2_rate_lb_per_mwh_for_electricity - toplot.consumed_co2_rate_lb_per_mwh_for_electricity)/toplot.generated_co2_rate_lb_per_mwh_for_electricity\n",
-            "toplot"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "all[\"difference\"] = ((all.generated_co2_rate_lb_per_mwh_for_electricity - all.consumed_co2_rate_lb_per_mwh_for_electricity).abs())* all.net_generation_mwh\n",
-            "px.line(all.groupby(\"datetime_utc\").mean()[\"difference\"])"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "all[all.datetime_utc==\"12-01-2020 T05:00+00:00\"].to_csv(\"outputs/problem_date.csv\")"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": [
-            "### Old code for arrows\n",
-            "    # # Arrows have to be added separately\n",
-            "    # line_size = val*width_factor\n",
-            "    # fig.add_annotation(\n",
-            "    #     x=ba_coords.loc[ba2,\"cx\"],  # arrows' head\n",
-            "    #     y=ba_coords.loc[ba2,\"cy\"],  # arrows' head\n",
-            "    #     ax=ba_coords.loc[ba1,\"cx\"],  # arrows' tail\n",
-            "    #     ay=ba_coords.loc[ba1,\"cy\"],  # arrows' tail\n",
-            "    #     xref='x',\n",
-            "    #     yref='y',\n",
-            "    #     axref='x',\n",
-            "    #     ayref='y',\n",
-            "    #     text='',  # if you want only the arrow\n",
-            "    #     showarrow=True,\n",
-            "    #     arrowhead=1,\n",
-            "    #     arrowsize=1, #max(.3, line_size),\n",
-            "    #     arrowwidth=1,\n",
-            "    #     arrowcolor='royalblue'\n",
-            "    # )"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "metadata": {},
-         "outputs": [],
-         "source": []
-      }
-   ],
-   "metadata": {
-      "kernelspec": {
-         "display_name": "Python 3.10.5 ('hourly_egrid')",
-         "language": "python",
-         "name": "python3"
-      },
-      "language_info": {
-         "codemirror_mode": {
-            "name": "ipython",
-            "version": 3
-         },
-         "file_extension": ".py",
-         "mimetype": "text/x-python",
-         "name": "python",
-         "nbconvert_exporter": "python",
-         "pygments_lexer": "ipython3",
-         "version": "3.10.5"
-      },
-      "orig_nbformat": 4,
-      "vscode": {
-         "interpreter": {
-            "hash": "65c02dfd2dc2ef471c0b5088763a28c1faaa7cad28937ca42fadf51e669fd8e8"
-         }
-      }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Maps for announcement blog post\n",
+    "\n",
+    "Visualization 2: The carbon intensity of consumed electricity differs from generated electricity\n",
+    "* Show a static carbon flow map focused on a single BA plus all directly-interconnected BAs\n",
+    "* Pick an hour when there is some particularly dirty electricity getting imported\n",
+    "* Each BA would be represented by a bubble, where the color changes based on carbon intensity, and carbon flows would be represented by colored arrows between the bubbles. \n",
+    "* To illustrate the difference between produced and consumed, we might want to have a pair of bubbles for each BA - one that shows the produced CI and one that shows the consumed CI. If we do this, we probably don’t want to vary the size of each bubble based on total generation. Or maybe we could do a split bubble - the top half shows produced CI and the bottom shows consumed CI.\n",
+    "\n",
+    "\n",
+    "Visualization 3: Animating hourly and consumed emissions for the whole county\n",
+    "* This animation should put the previous two concepts together and show how carbon flows and how CI changes across the entire country for a single day (or a week?)\n",
+    "* We could also potentially have two maps side by side: one that shows annual averages in bubbles (with no carbon flow), and one that shows the animated hourly flow (to really draw the distinction between annual and hourly datasets)\n",
+    "\n",
+    "\n",
+    "### Ref for making gif: \n",
+    "`https://stackoverflow.com/questions/753190/programmatically-generate-video-or-animated-gif-in-python`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import plotly.express as px\n",
+    "import plotly.graph_objects as go\n",
+    "from plotly.colors import *\n",
+    "import plotly.io as pio\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from PIL import Image\n",
+    "import imageio"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%reload_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "# # Tell python where to look for modules.\n",
+    "import sys\n",
+    "\n",
+    "sys.path.append(\"../../src/\")\n",
+    "\n",
+    "import output_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ba_coords = pd.read_csv(\n",
+    "    \"resources/ba_coords.csv\", index_col=0, dtype={\"cx\": np.float64, \"cy\": np.float64}\n",
+    ")\n",
+    "ba_meta = pd.read_csv(\"../../data/manual/ba_reference.csv\", index_col=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Note: 150+ BAs are not in ba_coords\n",
+    "ba_list = ba_meta[\n",
+    "    (ba_meta.ba_category != \"misellaneous\")\n",
+    "    & (ba_meta.us_ba)\n",
+    "    & (ba_meta.index.isin(ba_coords.index))\n",
+    "].index\n",
+    "ba_list = [\n",
+    "    ba\n",
+    "    for ba in ba_list\n",
+    "    if (\n",
+    "        f\"{ba}.csv\"\n",
+    "        in os.listdir(\"../../data/results/2020/power_sector_data/hourly/us_units/\")\n",
+    "    )\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cleaned_io = pd.read_csv(\n",
+    "    \"../../data/outputs/2020/eia930/eia930_elec.csv\", index_col=0, parse_dates=True\n",
+    ")\n",
+    "cleaned_io = cleaned_io[[c for c in cleaned_io.columns if \".ID.\" in c]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all = []\n",
+    "for ba in ba_list:\n",
+    "    produced = pd.read_csv(\n",
+    "        f\"../../data/results/2020/power_sector_data/hourly/us_units/{ba}.csv\",\n",
+    "        index_col=\"datetime_utc\",\n",
+    "        parse_dates=True,\n",
+    "        usecols=[\n",
+    "            \"datetime_utc\",\n",
+    "            \"fuel_category\",\n",
+    "            \"net_generation_mwh\",\n",
+    "            \"generated_co2_rate_lb_per_mwh_for_electricity\",\n",
+    "        ],\n",
+    "    )\n",
+    "    produced = produced[produced.fuel_category == \"total\"]\n",
+    "    produced = produced.drop(columns=[\"fuel_category\"])\n",
+    "\n",
+    "    if ba_meta.loc[ba, \"ba_category\"] == \"generation_only\":\n",
+    "        consumed = pd.DataFrame(\n",
+    "            index=produced.index,\n",
+    "            columns=[[\"consumed_co2_rate_lb_per_mwh_for_electricity\"]],\n",
+    "            dtype=np.float64,\n",
+    "        )\n",
+    "    else:\n",
+    "        consumed = pd.read_csv(\n",
+    "            f\"../../data/results/2020/carbon_accounting/hourly/us_units/{ba}.csv\",\n",
+    "            index_col=\"datetime_utc\",\n",
+    "            parse_dates=True,\n",
+    "            usecols=[\"datetime_utc\", \"consumed_co2_rate_lb_per_mwh_for_electricity\"],\n",
+    "        )\n",
+    "    consumed.columns = consumed.columns.get_level_values(0)\n",
+    "\n",
+    "    both = pd.concat([produced, consumed], axis=\"columns\")\n",
+    "    # both = both.loc[range_start:range_end]\n",
+    "    both = both.reset_index()\n",
+    "    both[\"BA\"] = ba\n",
+    "    all.append(both)\n",
+    "\n",
+    "all = pd.concat(all)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add coordinates\n",
+    "all = all.merge(\n",
+    "    ba_coords, how=\"left\", validate=\"many_to_one\", left_on=\"BA\", right_index=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# src: https://community.plotly.com/t/how-to-include-a-colorscale-for-color-of-line-graphs/38002/3\n",
+    "from ast import literal_eval\n",
+    "\n",
+    "\n",
+    "def get_color_for_val(val, vmin, vmax, pl_colors):\n",
+    "    if pl_colors[0][:3] != \"rgb\":\n",
+    "        raise ValueError(\"This function works only with Plotly  rgb-colorscales\")\n",
+    "    if vmin >= vmax:\n",
+    "        raise ValueError(\"vmin should be < vmax\")\n",
+    "\n",
+    "    scale = [round(k / (len(pl_colors)), 3) for k in range(len(pl_colors) + 1)]\n",
+    "\n",
+    "    colors_01 = (\n",
+    "        np.array([literal_eval(color[3:]) for color in pl_colors]) / 255\n",
+    "    )  # color codes in [0,1]\n",
+    "\n",
+    "    v = (val - vmin) / (vmax - vmin)  # val is mapped to v in [0,1]\n",
+    "    # find two consecutive values in plotly_scale such that   v is in  the corresponding interval\n",
+    "    idx = 0\n",
+    "\n",
+    "    while idx < (len(scale) - 2) and (v > scale[idx + 1]):\n",
+    "        idx += 1\n",
+    "\n",
+    "    vv = (v - scale[idx]) / (scale[idx + 1] - scale[idx])\n",
+    "\n",
+    "    # get   [0,1]-valued color code representing the rgb color corresponding to val\n",
+    "    if idx == len(pl_colors) - 1:  # Make this work when some values exceed range\n",
+    "        val_color01 = colors_01[idx]  # color by last color\n",
+    "    else:\n",
+    "        val_color01 = colors_01[idx] + vv * (colors_01[idx + 1] - colors_01[idx])\n",
+    "\n",
+    "    val_color_0255 = (255 * val_color01 + 0.5).astype(int)\n",
+    "    return f\"rgb{str(tuple(val_color_0255))}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "range_start = \"2020-08-01T04:00+00\"\n",
+    "range_end = \"2020-08-3T04:00+00\"\n",
+    "\n",
+    "# range_start = \"2020-07-21T12:00+00\"\n",
+    "# range_end = \"2020-07-23T12:00+00\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dir_name = \"maps\"\n",
+    "\n",
+    "c_max = 2200  # np.floor(all.generated_co2_rate_lb_per_mwh_for_electricity.max() + 100)\n",
+    "\n",
+    "hours = all.datetime_utc[\n",
+    "    (all.datetime_utc < pd.to_datetime(range_end))\n",
+    "    & (all.datetime_utc > pd.to_datetime(range_start))\n",
+    "    & (all.BA == \"CISO\")\n",
+    "]\n",
+    "for hour in hours:\n",
+    "    print(hour, end=\"...\")\n",
+    "    io_toplot = cleaned_io.loc[hour]\n",
+    "    toplot = all[all.datetime_utc == hour]\n",
+    "    fig = go.Figure()\n",
+    "\n",
+    "    toplot.loc[toplot.net_generation_mwh < 1, \"net_generation_mwh\"] = 1\n",
+    "    sizes = np.log(toplot.net_generation_mwh) / np.log(1.5)\n",
+    "    offset = sizes / 2.5\n",
+    "\n",
+    "    colorscale = diverging.RdYlGn_r\n",
+    "    # colorscale = cmocean.solar_r\n",
+    "\n",
+    "    ### From when\n",
+    "    # max_width = io_toplot.max()\n",
+    "    # width_factor = 8/max_width\n",
+    "    # width_factor = 1/200\n",
+    "    for name, val in io_toplot.iteritems():\n",
+    "        if val <= 0:\n",
+    "            continue\n",
+    "        bas = name.split(\".\")[1].split(\"-\")\n",
+    "        (ba1, ba2) = bas\n",
+    "\n",
+    "        next = False\n",
+    "        for ba in bas:\n",
+    "            if ba not in ba_coords.index:\n",
+    "                next = True\n",
+    "            if ba not in toplot.BA.unique():\n",
+    "                next = True\n",
+    "        if next:\n",
+    "            continue\n",
+    "\n",
+    "        color = toplot.loc[\n",
+    "            toplot.BA == ba1, \"generated_co2_rate_lb_per_mwh_for_electricity\"\n",
+    "        ].to_numpy()[0]\n",
+    "\n",
+    "        fig.add_trace(\n",
+    "            go.Scatter(\n",
+    "                x=ba_coords.loc[bas, \"cx\"],\n",
+    "                y=ba_coords.loc[bas, \"cy\"],\n",
+    "                opacity=1.0,\n",
+    "                mode=\"lines\",\n",
+    "                line=dict(\n",
+    "                    color=get_color_for_val(color, 0, c_max, colorscale), width=2\n",
+    "                ),\n",
+    "                showlegend=False,\n",
+    "            )\n",
+    "        )\n",
+    "\n",
+    "    ################################# Plot BAs\n",
+    "    toplot.loc[toplot.net_generation_mwh < 1, \"net_generation_mwh\"] = 1\n",
+    "    sizes = np.log(toplot.net_generation_mwh) / np.log(1.5)\n",
+    "    offset = sizes / 1.6\n",
+    "\n",
+    "    # Zero-generation BAs: plot under BAs with non-zero gen\n",
+    "    zero_gen_bas = (\n",
+    "        toplot.net_generation_mwh == 1\n",
+    "    )  # (we set to 1 above to make log work)\n",
+    "    fig.add_trace(\n",
+    "        go.Scatter(\n",
+    "            x=toplot.loc[zero_gen_bas, \"cx\"],\n",
+    "            y=toplot.loc[zero_gen_bas, \"cy\"] + (2 / 2.5),\n",
+    "            mode=\"markers\",\n",
+    "            hoverinfo=\"text\",\n",
+    "            text=toplot.loc[zero_gen_bas, \"BA\"],\n",
+    "            marker_symbol=\"triangle-up\",\n",
+    "            marker=dict(\n",
+    "                color=\"lightgrey\",\n",
+    "                line=dict(width=1, color=\"DarkSlateGrey\"),\n",
+    "                size=7,\n",
+    "                opacity=1.0,\n",
+    "                sizemode=\"diameter\",\n",
+    "            ),\n",
+    "            showlegend=False,\n",
+    "        )\n",
+    "    )\n",
+    "\n",
+    "    fig.add_trace(\n",
+    "        go.Scatter(\n",
+    "            x=toplot.cx,\n",
+    "            y=toplot.cy,\n",
+    "            mode=\"markers\",\n",
+    "            hoverinfo=\"text\",\n",
+    "            text=toplot.BA,\n",
+    "            marker_symbol=\"triangle-up\",\n",
+    "            opacity=1.0,\n",
+    "            marker=dict(\n",
+    "                color=toplot.generated_co2_rate_lb_per_mwh_for_electricity,\n",
+    "                size=sizes,\n",
+    "                sizemode=\"diameter\",\n",
+    "                cmin=0,\n",
+    "                cmax=c_max,\n",
+    "                opacity=1.0,\n",
+    "                line=dict(width=1, color=\"DarkSlateGrey\"),\n",
+    "                colorscale=\"rdylgn_r\",\n",
+    "            ),\n",
+    "            name=\"Generated\",\n",
+    "            showlegend=False,\n",
+    "        )\n",
+    "    )\n",
+    "    consumed_toplot = ~toplot.consumed_co2_rate_lb_per_mwh_for_electricity.isna()\n",
+    "    fig.add_trace(\n",
+    "        go.Scatter(\n",
+    "            x=toplot.loc[consumed_toplot, \"cx\"],\n",
+    "            y=toplot.loc[consumed_toplot, \"cy\"] + offset[consumed_toplot],\n",
+    "            mode=\"markers\",\n",
+    "            hoverinfo=\"text\",\n",
+    "            text=toplot.loc[consumed_toplot, \"BA\"],\n",
+    "            marker_symbol=\"triangle-down\",\n",
+    "            marker=dict(\n",
+    "                color=toplot.loc[\n",
+    "                    consumed_toplot, \"consumed_co2_rate_lb_per_mwh_for_electricity\"\n",
+    "                ],\n",
+    "                size=sizes[consumed_toplot],\n",
+    "                opacity=1.0,\n",
+    "                line=dict(width=1, color=\"DarkSlateGrey\"),\n",
+    "                sizemode=\"diameter\",\n",
+    "                cmin=0,\n",
+    "                cmax=c_max,\n",
+    "                colorbar=dict(\n",
+    "                    title=\"Emission rate<br>(lbs/MWh)\",\n",
+    "                    orientation=\"v\",\n",
+    "                    len=0.8,\n",
+    "                    thickness=20,\n",
+    "                    yanchor=\"bottom\",\n",
+    "                    y=0,\n",
+    "                    xpad=20,\n",
+    "                ),\n",
+    "                colorscale=\"rdylgn_r\",\n",
+    "            ),\n",
+    "            name=\"Consumed\",\n",
+    "            showlegend=False,\n",
+    "        )\n",
+    "    )\n",
+    "\n",
+    "    # Legends: don't want colored markers\n",
+    "    fig.add_trace(\n",
+    "        go.Scatter(\n",
+    "            x=[-10],\n",
+    "            y=[-10],\n",
+    "            mode=\"markers\",\n",
+    "            marker_symbol=\"triangle-up\",\n",
+    "            marker=dict(\n",
+    "                color=\"white\", line=dict(width=2, color=\"DarkSlateGrey\"), size=10\n",
+    "            ),\n",
+    "            name=\"Generated\",\n",
+    "        )\n",
+    "    )\n",
+    "    fig.add_trace(\n",
+    "        go.Scatter(\n",
+    "            x=[-10],\n",
+    "            y=[-10],\n",
+    "            mode=\"markers\",\n",
+    "            marker_symbol=\"triangle-down\",\n",
+    "            marker=dict(\n",
+    "                color=\"white\", line=dict(width=2, color=\"DarkSlateGrey\"), size=10\n",
+    "            ),\n",
+    "            name=\"Consumed\",\n",
+    "        )\n",
+    "    )\n",
+    "    fig.update_yaxes(range=(550, 0))  # autorange=\"reversed\")\n",
+    "    fig.update_xaxes(range=(0, 800))\n",
+    "\n",
+    "    # Add images\n",
+    "    fig.add_layout_image(\n",
+    "        dict(\n",
+    "            source=Image.open(\"resources/usa.png\"),\n",
+    "            xref=\"x\",\n",
+    "            yref=\"y\",\n",
+    "            x=10,\n",
+    "            y=0,\n",
+    "            sizex=790,\n",
+    "            sizey=550,\n",
+    "            sizing=\"stretch\",\n",
+    "            opacity=0.5,\n",
+    "            layer=\"below\",\n",
+    "        )\n",
+    "    )\n",
+    "\n",
+    "    # Add images\n",
+    "    fig.add_layout_image(\n",
+    "        dict(\n",
+    "            source=Image.open(\"resources/legend_bottom_smaller.png\"),\n",
+    "            xref=\"x\",\n",
+    "            yref=\"y\",\n",
+    "            x=-20,\n",
+    "            y=400,\n",
+    "            sizex=260,\n",
+    "            sizey=200,\n",
+    "            sizing=\"contain\",\n",
+    "            opacity=1.0,\n",
+    "            layer=\"below\",\n",
+    "        )\n",
+    "    )\n",
+    "\n",
+    "    # Set templates\n",
+    "    fig.update_layout(\n",
+    "        template=\"plotly_white\",\n",
+    "        width=800,\n",
+    "        height=600,\n",
+    "        yaxis_visible=False,\n",
+    "        xaxis_visible=False,\n",
+    "        title=hour.tz_convert(\"US/Eastern\").strftime(\"%B %-d, %Y - %-I:00 %p ET\"),\n",
+    "    )\n",
+    "    # fig.show()\n",
+    "    os.makedirs(f\"outputs/{dir_name}/\", exist_ok=True)\n",
+    "    fig.write_image(f\"outputs/{dir_name}/{hour}.png\", scale=2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## Make gif\n",
+    "images = []\n",
+    "files = [f for f in os.listdir(f\"outputs/{dir_name}/\") if \".png\" in f]\n",
+    "files.sort()\n",
+    "for f in files:\n",
+    "    images.append(imageio.imread(f\"outputs/{dir_name}/\" + f))\n",
+    "imageio.mimsave(f\"outputs/movie_{dir_name}.gif\", images)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now just CISO and neighbors.\n",
+    "\n",
+    "# Get list of CISO neighbors\n",
+    "ciso_interchanges = [c for c in cleaned_io.columns if \"CISO\" in c]\n",
+    "ciso_bas = []\n",
+    "for ci in ciso_interchanges:\n",
+    "    ba1, ba2 = ci.split(\".\")[1].split(\"-\")\n",
+    "    if ba1 not in ciso_bas:\n",
+    "        ciso_bas.append(ba1)\n",
+    "    if ba2 not in ciso_bas:\n",
+    "        ciso_bas.append(ba2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Identify day with max rate difference in CISO\n",
+    "tester = (all[all.BA == \"CISO\"]).copy()\n",
+    "tester[\"difference\"] = (\n",
+    "    tester.consumed_co2_rate_lb_per_mwh_for_electricity\n",
+    "    - tester.generated_co2_rate_lb_per_mwh_for_electricity\n",
+    ")\n",
+    "tester.difference.abs().max()\n",
+    "tester[tester.difference == tester.difference.abs().max()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hour with max CISO difference generated / consumed\n",
+    "hour = \"2020-09-25 12:00:00+00:00\"\n",
+    "\n",
+    "io_toplot = cleaned_io.loc[hour, ciso_interchanges]\n",
+    "toplot = all[all.datetime_utc == hour]\n",
+    "toplot = toplot[toplot.BA.isin(ciso_bas)]\n",
+    "fig = go.Figure()\n",
+    "\n",
+    "colorscale = diverging.RdYlGn_r\n",
+    "# colorscale = cmocean.solar_r\n",
+    "\n",
+    "c_max = np.floor(toplot.generated_co2_rate_lb_per_mwh_for_electricity.max() + 100)\n",
+    "\n",
+    "### From when\n",
+    "# max_width = io_toplot.max()\n",
+    "# width_factor = 8/max_width\n",
+    "# width_factor = 1/200\n",
+    "for name, val in io_toplot.iteritems():\n",
+    "    if val <= 0:\n",
+    "        continue\n",
+    "    bas = name.split(\".\")[1].split(\"-\")\n",
+    "    (ba1, ba2) = bas\n",
+    "\n",
+    "    next = False\n",
+    "    for ba in bas:\n",
+    "        if ba not in ba_coords.index:\n",
+    "            next = True\n",
+    "        if ba not in toplot.BA.unique():\n",
+    "            next = True\n",
+    "    if next:\n",
+    "        continue\n",
+    "\n",
+    "    color = toplot.loc[\n",
+    "        toplot.BA == ba1, \"generated_co2_rate_lb_per_mwh_for_electricity\"\n",
+    "    ].to_numpy()[0]\n",
+    "    print(color)\n",
+    "\n",
+    "    fig.add_trace(\n",
+    "        go.Scatter(\n",
+    "            x=ba_coords.loc[bas, \"cx\"],\n",
+    "            y=ba_coords.loc[bas, \"cy\"],\n",
+    "            opacity=1.0,\n",
+    "            mode=\"lines\",\n",
+    "            line=dict(color=get_color_for_val(color, 0, c_max, colorscale), width=2),\n",
+    "            showlegend=False,\n",
+    "        )\n",
+    "    )\n",
+    "\n",
+    "################################# Plot BAs\n",
+    "toplot.loc[toplot.net_generation_mwh < 1, \"net_generation_mwh\"] = 1\n",
+    "sizes = np.log(toplot.net_generation_mwh) / np.log(1.5)\n",
+    "offset = sizes / 1.6\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=toplot.cx,\n",
+    "        y=toplot.cy - offset,\n",
+    "        mode=\"markers\",\n",
+    "        hoverinfo=\"text\",\n",
+    "        text=toplot.BA,\n",
+    "        marker_symbol=\"triangle-up\",\n",
+    "        opacity=1.0,\n",
+    "        marker=dict(\n",
+    "            line=dict(width=1, color=\"DarkSlateGrey\"),\n",
+    "            color=toplot.generated_co2_rate_lb_per_mwh_for_electricity,\n",
+    "            size=sizes,\n",
+    "            sizemode=\"diameter\",\n",
+    "            cmin=0,\n",
+    "            cmax=c_max,\n",
+    "            opacity=1.0,\n",
+    "            colorscale=\"rdylgn_r\",\n",
+    "        ),\n",
+    "        name=\"Generated\",\n",
+    "        showlegend=False,\n",
+    "    )\n",
+    ")\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=toplot.cx,\n",
+    "        y=toplot.cy,\n",
+    "        mode=\"markers\",\n",
+    "        hoverinfo=\"text\",\n",
+    "        text=toplot.BA,\n",
+    "        marker_symbol=\"triangle-down\",\n",
+    "        marker=dict(\n",
+    "            color=toplot.consumed_co2_rate_lb_per_mwh_for_electricity,\n",
+    "            size=sizes,\n",
+    "            opacity=1.0,\n",
+    "            line=dict(width=1, color=\"DarkSlateGrey\"),\n",
+    "            sizemode=\"diameter\",\n",
+    "            cmin=0,\n",
+    "            cmax=c_max,\n",
+    "            colorbar=dict(\n",
+    "                title=\"Emission rate<br>(lbs/MWh)\",\n",
+    "                orientation=\"v\",\n",
+    "                len=0.8,\n",
+    "                thickness=20,\n",
+    "                yanchor=\"bottom\",\n",
+    "                y=0,\n",
+    "                xpad=20,\n",
+    "            ),\n",
+    "            colorscale=\"rdylgn_r\",\n",
+    "        ),\n",
+    "        name=\"Consumed\",\n",
+    "        showlegend=False,\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# Legends: don't want colored markers\n",
+    "# Legends: don't want colored markers\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=[-10],\n",
+    "        y=[-10],\n",
+    "        mode=\"markers\",\n",
+    "        marker_symbol=\"triangle-up\",\n",
+    "        marker=dict(color=\"white\", line=dict(width=2, color=\"DarkSlateGrey\"), size=10),\n",
+    "        name=\"Generated\",\n",
+    "    )\n",
+    ")\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=[-10],\n",
+    "        y=[-10],\n",
+    "        mode=\"markers\",\n",
+    "        marker_symbol=\"triangle-down\",\n",
+    "        marker=dict(color=\"white\", line=dict(width=2, color=\"DarkSlateGrey\"), size=10),\n",
+    "        name=\"Consumed\",\n",
+    "    )\n",
+    ")\n",
+    "fig.update_yaxes(range=(500, 0))  # autorange=\"reversed\")\n",
+    "fig.update_xaxes(range=(0, 200))\n",
+    "\n",
+    "## loop through the labels and add them as annotations\n",
+    "for x in zip(toplot.BA, toplot.cx, toplot.cy):\n",
+    "    left_bas = [\"BANC\", \"TIDC\", \"CISO\", \"LDWP\", \"IID\"]\n",
+    "    delta = -12 if x[0] in left_bas else 12\n",
+    "    fig.add_annotation(\n",
+    "        x=x[1] + delta,\n",
+    "        y=x[2],\n",
+    "        text=x[0],\n",
+    "        showarrow=False,\n",
+    "        xanchor=(\"right\" if x[0] in left_bas else \"left\"),\n",
+    "    )\n",
+    "\n",
+    "# Add images\n",
+    "fig.add_layout_image(\n",
+    "    dict(\n",
+    "        source=Image.open(\"resources/usa.png\"),\n",
+    "        xref=\"x\",\n",
+    "        yref=\"y\",\n",
+    "        x=10,\n",
+    "        y=0,\n",
+    "        sizex=790,\n",
+    "        sizey=550,\n",
+    "        sizing=\"stretch\",\n",
+    "        opacity=0.5,\n",
+    "        layer=\"below\",\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# Add images\n",
+    "fig.add_layout_image(\n",
+    "    dict(\n",
+    "        source=Image.open(\"resources/legend_bottom_smaller.png\"),\n",
+    "        xref=\"x\",\n",
+    "        yref=\"y\",\n",
+    "        x=-20,\n",
+    "        y=360,\n",
+    "        sizex=220,\n",
+    "        sizey=144,\n",
+    "        sizing=\"contain\",\n",
+    "        opacity=1.0,\n",
+    "        layer=\"below\",\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# Set templates\n",
+    "fig.update_layout(\n",
+    "    template=\"plotly_white\",\n",
+    "    width=400,\n",
+    "    height=550,\n",
+    "    yaxis_visible=False,\n",
+    "    xaxis_visible=False,\n",
+    "    title=pd.to_datetime(hour)\n",
+    "    .tz_convert(\"US/Pacific\")\n",
+    "    .strftime(\"%B %-d, %Y - %-I:00 %p PT\"),\n",
+    ")\n",
+    "fig.show()\n",
+    "fig.write_image(f\"outputs/viz2_legend.png\", scale=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "toplot[\"difference\"] = (\n",
+    "    toplot.generated_co2_rate_lb_per_mwh_for_electricity\n",
+    "    - toplot.consumed_co2_rate_lb_per_mwh_for_electricity\n",
+    ") / toplot.generated_co2_rate_lb_per_mwh_for_electricity\n",
+    "toplot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all[\"difference\"] = (\n",
+    "    (\n",
+    "        all.generated_co2_rate_lb_per_mwh_for_electricity\n",
+    "        - all.consumed_co2_rate_lb_per_mwh_for_electricity\n",
+    "    ).abs()\n",
+    ") * all.net_generation_mwh\n",
+    "px.line(all.groupby(\"datetime_utc\").mean()[\"difference\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all[all.datetime_utc == \"12-01-2020 T05:00+00:00\"].to_csv(\"outputs/problem_date.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### Old code for arrows\n",
+    "# # Arrows have to be added separately\n",
+    "# line_size = val*width_factor\n",
+    "# fig.add_annotation(\n",
+    "#     x=ba_coords.loc[ba2,\"cx\"],  # arrows' head\n",
+    "#     y=ba_coords.loc[ba2,\"cy\"],  # arrows' head\n",
+    "#     ax=ba_coords.loc[ba1,\"cx\"],  # arrows' tail\n",
+    "#     ay=ba_coords.loc[ba1,\"cy\"],  # arrows' tail\n",
+    "#     xref='x',\n",
+    "#     yref='y',\n",
+    "#     axref='x',\n",
+    "#     ayref='y',\n",
+    "#     text='',  # if you want only the arrow\n",
+    "#     showarrow=True,\n",
+    "#     arrowhead=1,\n",
+    "#     arrowsize=1, #max(.3, line_size),\n",
+    "#     arrowwidth=1,\n",
+    "#     arrowcolor='royalblue'\n",
+    "# )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10.5 ('hourly_egrid')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
    },
-   "nbformat": 4,
-   "nbformat_minor": 2
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.5"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "65c02dfd2dc2ef471c0b5088763a28c1faaa7cad28937ca42fadf51e669fd8e8"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
 }
diff --git a/notebooks/visualization/plot_timeseries_data.ipynb b/notebooks/visualization/plot_timeseries_data.ipynb
index baf8e715..0228b61d 100644
--- a/notebooks/visualization/plot_timeseries_data.ipynb
+++ b/notebooks/visualization/plot_timeseries_data.ipynb
@@ -15,7 +15,8 @@
     "\n",
     "# # Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
     "\n",
     "from filepaths import *\n",
     "import validation\n",
@@ -41,7 +42,9 @@
     "pollutant = \"co2\"\n",
     "rate_type = \"for_electricity\"\n",
     "\n",
-    "data_to_graph = visualization.load_ba_ef_data_to_graph(ba, year, pollutant, rate_type, show_egrid)\n",
+    "data_to_graph = visualization.load_ba_ef_data_to_graph(\n",
+    "    ba, year, pollutant, rate_type, show_egrid\n",
+    ")\n",
     "data_to_graph"
    ]
   },
@@ -77,7 +80,7 @@
     "    labels={\"value\": \"lbCO2e/MWh\"},\n",
     "    template=\"plotly_white\",\n",
     "    title=ba,\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -117,7 +120,7 @@
     ")\n",
     "\n",
     "\n",
-    "fig.show()\n"
+    "fig.show()"
    ]
   },
   {
@@ -145,7 +148,9 @@
     "ba = \"CISO\"\n",
     "year = 2020\n",
     "\n",
-    "power_sector_data = pd.read_csv(results_folder(f\"{year}/power_sector_data/hourly/us_units/{ba}.csv\"))"
+    "power_sector_data = pd.read_csv(\n",
+    "    results_folder(f\"{year}/power_sector_data/hourly/us_units/{ba}.csv\")\n",
+    ")"
    ]
   },
   {
@@ -160,7 +165,7 @@
     "    column_name=\"net_generation_mwh\",\n",
     "    fuel_category_name=\"fuel_category\",\n",
     "    plot_type=\"area\",\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -175,7 +180,7 @@
     "    column_name=\"co2_mass_lb\",\n",
     "    fuel_category_name=\"fuel_category\",\n",
     "    plot_type=\"area\",\n",
-    ")\n"
+    ")"
    ]
   }
  ],
diff --git a/notebooks/work_in_progress/GH102_test_dask.ipynb b/notebooks/work_in_progress/GH102_test_dask.ipynb
index db4d08ad..5d0553aa 100644
--- a/notebooks/work_in_progress/GH102_test_dask.ipynb
+++ b/notebooks/work_in_progress/GH102_test_dask.ipynb
@@ -28,8 +28,11 @@
    "outputs": [],
    "source": [
     "# Start client so can see worker mem use\n",
-    "from dask.distributed import Client \n",
-    "client = Client(n_workers=2, threads_per_worker=2, memory_limit='1GB') # limit worker size to create issues even on --small dataset\n",
+    "from dask.distributed import Client\n",
+    "\n",
+    "client = Client(\n",
+    "    n_workers=2, threads_per_worker=2, memory_limit=\"1GB\"\n",
+    ")  # limit worker size to create issues even on --small dataset\n",
     "client"
    ]
   },
@@ -45,7 +48,7 @@
     "# # Tell python where to look for modules.\n",
     "import sys\n",
     "\n",
-    "sys.path.append('../../open-grid-emissions/')\n",
+    "sys.path.append(\"../../open-grid-emissions/\")\n",
     "\n",
     "# import local modules\n",
     "import src.data_cleaning as data_cleaning"
@@ -57,12 +60,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "o_shaped_eia_data = pd.read_csv(PATH_TO_LOCAL_REPO + 'data/outputs/small/shaped_eia923_data_2020.csv', parse_dates=['datetime_utc'])\n",
-    "o_partial_cems_scaled = pd.read_csv(PATH_TO_LOCAL_REPO + 'data/outputs/small/partial_cems_scaled_2020.csv', parse_dates=['datetime_utc']) #NOT FINAL VERSION \n",
-    "o_cems = pd.read_csv(PATH_TO_LOCAL_REPO + 'data/outputs/small/cems_2020.csv', parse_dates=['datetime_utc']) # NOT FINAL VERSION\n",
+    "o_shaped_eia_data = pd.read_csv(\n",
+    "    PATH_TO_LOCAL_REPO + \"data/outputs/small/shaped_eia923_data_2020.csv\",\n",
+    "    parse_dates=[\"datetime_utc\"],\n",
+    ")\n",
+    "o_partial_cems_scaled = pd.read_csv(\n",
+    "    PATH_TO_LOCAL_REPO + \"data/outputs/small/partial_cems_scaled_2020.csv\",\n",
+    "    parse_dates=[\"datetime_utc\"],\n",
+    ")  # NOT FINAL VERSION\n",
+    "o_cems = pd.read_csv(\n",
+    "    PATH_TO_LOCAL_REPO + \"data/outputs/small/cems_2020.csv\",\n",
+    "    parse_dates=[\"datetime_utc\"],\n",
+    ")  # NOT FINAL VERSION\n",
     "\n",
     "# shaped_eia_data = pd.read_csv(PATH_TO_LOCAL_REPO + 'data/outputs/shaped_eia923_data_2020.csv')\n",
-    "# partial_cems_scaled = pd.read_csv(PATH_TO_LOCAL_REPO + 'data/outputs/partial_cems_scaled_2020.csv') #NOT FINAL VERSION \n",
+    "# partial_cems_scaled = pd.read_csv(PATH_TO_LOCAL_REPO + 'data/outputs/partial_cems_scaled_2020.csv') #NOT FINAL VERSION\n",
     "# cems = pd.read_csv(PATH_TO_LOCAL_REPO + 'data/outputs/cems_2020.csv') # NOT FINAL VERSION"
    ]
   },
@@ -72,10 +84,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# most of the stuff done to partial_cems_scaled and cems in data_pipeline is in plant_static_attributes: \n",
-    "#plant_static_attributes = pd.read_csv(PATH_TO_LOCAL_REPO + 'data/results/plant_data/plant_static_attributes.csv')\n",
+    "# most of the stuff done to partial_cems_scaled and cems in data_pipeline is in plant_static_attributes:\n",
+    "# plant_static_attributes = pd.read_csv(PATH_TO_LOCAL_REPO + 'data/results/plant_data/plant_static_attributes.csv')\n",
     "\n",
-    "plant_static_attributes = pd.read_csv(PATH_TO_LOCAL_REPO + 'data/results/small/plant_data/plant_static_attributes.csv')"
+    "plant_static_attributes = pd.read_csv(\n",
+    "    PATH_TO_LOCAL_REPO + \"data/results/small/plant_data/plant_static_attributes.csv\"\n",
+    ")"
    ]
   },
   {
@@ -84,8 +98,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "o_partial_cems_scaled = o_partial_cems_scaled.merge(plant_static_attributes, how='left', on='plant_id_eia')\n",
-    "o_cems = o_cems.merge(plant_static_attributes, how='left', on='plant_id_eia')"
+    "o_partial_cems_scaled = o_partial_cems_scaled.merge(\n",
+    "    plant_static_attributes, how=\"left\", on=\"plant_id_eia\"\n",
+    ")\n",
+    "o_cems = o_cems.merge(plant_static_attributes, how=\"left\", on=\"plant_id_eia\")"
    ]
   },
   {
@@ -98,7 +114,7 @@
     "# On full dataset, these break with worker size 16GB (ie Gailin's laptop)\n",
     "\n",
     "combined_plant_data = data_cleaning.combine_subplant_data(\n",
-    "        o_cems, o_partial_cems_scaled, o_shaped_eia_data\n",
+    "    o_cems, o_partial_cems_scaled, o_shaped_eia_data\n",
     ")\n",
     "\n",
     "# 12. Aggregate CEMS data to BA-fuel and write power sector results\n",
@@ -113,8 +129,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "\n",
-    "#plants = combined_plant_data.groupby('plant_id_eia').sum().compute()"
+    "# plants = combined_plant_data.groupby('plant_id_eia').sum().compute()"
    ]
   },
   {
@@ -139,7 +154,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#dt_shaped['datetime_utc'] = dt_shaped.datetime_utc.dt.to_datetime64()\n"
+    "# dt_shaped['datetime_utc'] = dt_shaped.datetime_utc.dt.to_datetime64()\n"
    ]
   },
   {
@@ -148,7 +163,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dt_shaped = o_shaped_eia_data.set_index('datetime_utc')\n",
+    "dt_shaped = o_shaped_eia_data.set_index(\"datetime_utc\")\n",
     "shaped_eia_dask = dd.from_pandas(dt_shaped, npartitions=50)"
    ]
   },
@@ -167,7 +182,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "smol = shaped_eia_dask[['net_generation_mwh','fuel_consumed_mmbtu']]"
+    "smol = shaped_eia_dask[[\"net_generation_mwh\", \"fuel_consumed_mmbtu\"]]"
    ]
   },
   {
@@ -176,8 +191,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# even this breaks. If can get this to work, maybe that solution works for rest? \n",
-    "smol.groupby('datetime_utc').sum().compute()"
+    "# even this breaks. If can get this to work, maybe that solution works for rest?\n",
+    "smol.groupby(\"datetime_utc\").sum().compute()"
    ]
   },
   {
diff --git a/notebooks/work_in_progress/GH153_fill_missing_nox_so2_cems.ipynb b/notebooks/work_in_progress/GH153_fill_missing_nox_so2_cems.ipynb
index d9c1c94b..1196e6a6 100644
--- a/notebooks/work_in_progress/GH153_fill_missing_nox_so2_cems.ipynb
+++ b/notebooks/work_in_progress/GH153_fill_missing_nox_so2_cems.ipynb
@@ -122,7 +122,9 @@
    "outputs": [],
    "source": [
     "epa_eia_crosswalk = load_data.load_epa_eia_crosswalk(year)\n",
-    "boiler_to_unit_crosswalk = epa_eia_crosswalk[[\"plant_id_eia\",\"emissions_unit_id_epa\",\"boiler_id\"]].drop_duplicates()\n",
+    "boiler_to_unit_crosswalk = epa_eia_crosswalk[\n",
+    "    [\"plant_id_eia\", \"emissions_unit_id_epa\", \"boiler_id\"]\n",
+    "].drop_duplicates()\n",
     "boiler_to_unit_crosswalk"
    ]
   },
@@ -175,8 +177,7 @@
     "        \"boiler_bottom_type\",\n",
     "        \"boiler_firing_type\",\n",
     "    ]\n",
-    ")\n",
-    "\n"
+    ")"
    ]
   },
   {
@@ -201,7 +202,9 @@
    "outputs": [],
    "source": [
     "epa_eia_crosswalk = load_data.load_epa_eia_crosswalk(year)\n",
-    "boiler_to_unit_crosswalk = epa_eia_crosswalk[[\"plant_id_eia\",\"emissions_unit_id_epa\",\"boiler_id\"]].drop_duplicates()"
+    "boiler_to_unit_crosswalk = epa_eia_crosswalk[\n",
+    "    [\"plant_id_eia\", \"emissions_unit_id_epa\", \"boiler_id\"]\n",
+    "].drop_duplicates()"
    ]
   },
   {
@@ -211,13 +214,31 @@
    "outputs": [],
    "source": [
     "# merge in all possible fuel-specific emission factors for the pm boiler design parameters\n",
-    "boiler_factors = boiler_firing_type.merge(nox_emission_factors, how=\"left\", on=[\"prime_mover_code\",\"boiler_bottom_type\",\"boiler_firing_type\"])\n",
+    "boiler_factors = boiler_firing_type.merge(\n",
+    "    nox_emission_factors,\n",
+    "    how=\"left\",\n",
+    "    on=[\"prime_mover_code\", \"boiler_bottom_type\", \"boiler_firing_type\"],\n",
+    ")\n",
     "\n",
     "# associate a unit\n",
-    "boiler_factors = boiler_factors.merge(boiler_to_unit_crosswalk, how=\"left\", on=[\"plant_id_eia\",\"boiler_id\"])\n",
+    "boiler_factors = boiler_factors.merge(\n",
+    "    boiler_to_unit_crosswalk, how=\"left\", on=[\"plant_id_eia\", \"boiler_id\"]\n",
+    ")\n",
     "\n",
     "# aggregate by unit fuel\n",
-    "boiler_factors = boiler_factors.groupby([\"plant_id_eia\",\"emissions_unit_id_epa\",\"energy_source_code\",\"emission_factor_denominator\"], dropna=False)[\"emission_factor\"].mean().reset_index()\n",
+    "boiler_factors = (\n",
+    "    boiler_factors.groupby(\n",
+    "        [\n",
+    "            \"plant_id_eia\",\n",
+    "            \"emissions_unit_id_epa\",\n",
+    "            \"energy_source_code\",\n",
+    "            \"emission_factor_denominator\",\n",
+    "        ],\n",
+    "        dropna=False,\n",
+    "    )[\"emission_factor\"]\n",
+    "    .mean()\n",
+    "    .reset_index()\n",
+    ")\n",
     "\n",
     "boiler_factors"
    ]
diff --git a/notebooks/work_in_progress/GH240_eia930_physics_reconciliation.ipynb b/notebooks/work_in_progress/GH240_eia930_physics_reconciliation.ipynb
index b67df60b..92cc342c 100644
--- a/notebooks/work_in_progress/GH240_eia930_physics_reconciliation.ipynb
+++ b/notebooks/work_in_progress/GH240_eia930_physics_reconciliation.ipynb
@@ -33,7 +33,8 @@
     "\n",
     "# # Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
     "\n",
     "import download_data\n",
     "import load_data\n",
@@ -64,7 +65,18 @@
     "eia930_raw = eia930.load_chalendar_for_pipeline(raw_930_file, year=year)\n",
     "eia930_data = eia930.load_chalendar_for_pipeline(clean_930_file, year=year)\n",
     "\n",
-    "eia930_merged = eia930_raw.merge(eia930_data, how=\"left\", on=[\"ba_code\",\"fuel_category_eia930\",\"datetime_utc\",\"datetime_local\",\"report_date\"], suffixes=(\"_raw\",\"_cleaned\"))"
+    "eia930_merged = eia930_raw.merge(\n",
+    "    eia930_data,\n",
+    "    how=\"left\",\n",
+    "    on=[\n",
+    "        \"ba_code\",\n",
+    "        \"fuel_category_eia930\",\n",
+    "        \"datetime_utc\",\n",
+    "        \"datetime_local\",\n",
+    "        \"report_date\",\n",
+    "    ],\n",
+    "    suffixes=(\"_raw\", \"_cleaned\"),\n",
+    ")"
    ]
   },
   {
@@ -74,10 +86,18 @@
    "outputs": [],
    "source": [
     "# calculate how well correlated the raw and cleaned data is\n",
-    "correlations = eia930_merged.groupby([\"ba_code\",\"fuel_category_eia930\",\"report_date\"], dropna=False)[[\"net_generation_mwh_930_raw\",\"net_generation_mwh_930_cleaned\"]].corr().reset_index()\n",
+    "correlations = (\n",
+    "    eia930_merged.groupby(\n",
+    "        [\"ba_code\", \"fuel_category_eia930\", \"report_date\"], dropna=False\n",
+    "    )[[\"net_generation_mwh_930_raw\", \"net_generation_mwh_930_cleaned\"]]\n",
+    "    .corr()\n",
+    "    .reset_index()\n",
+    ")\n",
     "correlations = correlations[correlations[\"level_3\"] == \"net_generation_mwh_930_raw\"]\n",
-    "correlations = correlations.drop(columns=[\"level_3\",\"net_generation_mwh_930_raw\"])\n",
-    "correlations = correlations.rename(columns={\"net_generation_mwh_930_cleaned\":\"correlation_with_raw\"})\n",
+    "correlations = correlations.drop(columns=[\"level_3\", \"net_generation_mwh_930_raw\"])\n",
+    "correlations = correlations.rename(\n",
+    "    columns={\"net_generation_mwh_930_cleaned\": \"correlation_with_raw\"}\n",
+    ")\n",
     "correlations = correlations[correlations[\"report_date\"].dt.year == 2020]\n",
     "correlations"
    ]
@@ -91,7 +111,9 @@
     "ba = \"PJM\"\n",
     "fuel = \"coal\"\n",
     "\n",
-    "correlations[(correlations[\"ba_code\"] == ba) & (correlations[\"fuel_category_eia930\"] == fuel)]"
+    "correlations[\n",
+    "    (correlations[\"ba_code\"] == ba) & (correlations[\"fuel_category_eia930\"] == fuel)\n",
+    "]"
    ]
   },
   {
@@ -112,9 +134,15 @@
     "ba = \"BPAT\"\n",
     "fuel = \"nuclear\"\n",
     "\n",
-    "data_to_plot = eia930_merged[(eia930_merged[\"ba_code\"] == ba) & (eia930_merged[\"fuel_category_eia930\"] == fuel)]\n",
+    "data_to_plot = eia930_merged[\n",
+    "    (eia930_merged[\"ba_code\"] == ba) & (eia930_merged[\"fuel_category_eia930\"] == fuel)\n",
+    "]\n",
     "\n",
-    "px.line(data_to_plot, x=\"datetime_local\", y=[\"net_generation_mwh_930_raw\",\"net_generation_mwh_930_cleaned\"])"
+    "px.line(\n",
+    "    data_to_plot,\n",
+    "    x=\"datetime_local\",\n",
+    "    y=[\"net_generation_mwh_930_raw\", \"net_generation_mwh_930_cleaned\"],\n",
+    ")"
    ]
   }
  ],
diff --git a/notebooks/work_in_progress/clean_cems_outliers.ipynb b/notebooks/work_in_progress/clean_cems_outliers.ipynb
index 9df519cf..5c4a565d 100644
--- a/notebooks/work_in_progress/clean_cems_outliers.ipynb
+++ b/notebooks/work_in_progress/clean_cems_outliers.ipynb
@@ -59,7 +59,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems = pd.read_csv(f\"{outputs_folder()}/{year}/cems_cleaned_{year}.csv\", dtype=get_dtypes())"
+    "cems = pd.read_csv(\n",
+    "    f\"{outputs_folder()}/{year}/cems_cleaned_{year}.csv\", dtype=get_dtypes()\n",
+    ")"
    ]
   },
   {
@@ -68,8 +70,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# example CEMS data \n",
-    "px.line(cems[cems.plant_id_eia==3], x=\"datetime_utc\", y=\"net_generation_mwh\", color=\"subplant_id\")"
+    "# example CEMS data\n",
+    "px.line(\n",
+    "    cems[cems.plant_id_eia == 3],\n",
+    "    x=\"datetime_utc\",\n",
+    "    y=\"net_generation_mwh\",\n",
+    "    color=\"subplant_id\",\n",
+    ")"
    ]
   },
   {
@@ -108,12 +115,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Table is unique by plant ID, generator ID. \n",
+    "# Table is unique by plant ID, generator ID.\n",
     "# For each plant and generator, find the maximum of the three capacity values (summer, winter, nameplate)\n",
     "gens = gens_pudl.copy(deep=True)\n",
     "gens[\"net_capacity_mw\"] = gens.winter_capacity_mw.combine(gens.summer_capacity_mw, max)\n",
     "gens[\"net_capacity_mw\"] = gens.net_capacity_mw.combine(gens.capacity_mw, max)\n",
-    "gens = gens.loc[:,[\"plant_id_eia\",\"generator_id\",\"net_capacity_mw\"]]"
+    "gens = gens.loc[:, [\"plant_id_eia\", \"generator_id\", \"net_capacity_mw\"]]"
    ]
   },
   {
@@ -132,7 +139,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "gens_unit = gens.merge(subplant_crosswalk, how='left', on=['plant_id_eia','generator_id'])\n",
+    "gens_unit = gens.merge(\n",
+    "    subplant_crosswalk, how=\"left\", on=[\"plant_id_eia\", \"generator_id\"]\n",
+    ")\n",
     "print(f\"Setting {sum(gens_unit.subplant_id.isna())} NaN subplants to 1 in 860 data\")\n",
     "gens_unit.loc[gens_unit.subplant_id.isna()] = 1"
    ]
@@ -143,7 +152,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Group gens by subplant \n",
+    "# Group gens by subplant\n",
     "gens_per_sub = gens_unit.groupby([\"plant_id_eia\", \"subplant_id\"]).sum().reset_index()"
    ]
   },
@@ -172,8 +181,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Now that nans are gone, we can switch from the weird pandas int dtype to numpy dtype, which is required for merge \n",
-    "cems = cems.astype(dtype={\"subplant_id\":np.int32})"
+    "# Now that nans are gone, we can switch from the weird pandas int dtype to numpy dtype, which is required for merge\n",
+    "cems = cems.astype(dtype={\"subplant_id\": np.int32})"
    ]
   },
   {
@@ -182,7 +191,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems_cap = cems.merge(gens_per_sub[[\"plant_id_eia\", \"subplant_id\", \"net_capacity_mw\"]], how='left', on=[\"plant_id_eia\", \"subplant_id\"])"
+    "cems_cap = cems.merge(\n",
+    "    gens_per_sub[[\"plant_id_eia\", \"subplant_id\", \"net_capacity_mw\"]],\n",
+    "    how=\"left\",\n",
+    "    on=[\"plant_id_eia\", \"subplant_id\"],\n",
+    ")"
    ]
   },
   {
@@ -191,10 +204,37 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# TODO: here we're assuming that all columns are bad if net gen is bad, and that all bad rows have bad net gen. \n",
-    "dat_cols = ['gross_generation_mwh', 'steam_load_1000_lb', 'fuel_consumed_mmbtu', 'co2_mass_lb', 'nox_mass_lb', 'so2_mass_lb', 'plant_id_epa', 'co2_mass_measurement_code', 'nox_mass_measurement_code', 'so2_mass_measurement_code', 'report_date', 'energy_source_code', 'ch4_mass_lb', 'n2o_mass_lb', 'fuel_consumed_for_electricity_mmbtu', 'co2_mass_lb_for_electricity', 'ch4_mass_lb_for_electricity', 'n2o_mass_lb_for_electricity', 'nox_mass_lb_for_electricity', 'so2_mass_lb_for_electricity', 'co2_mass_lb_adjusted', 'ch4_mass_lb_adjusted', 'n2o_mass_lb_adjusted', 'nox_mass_lb_adjusted', 'so2_mass_lb_adjusted','net_generation_mwh']\n",
+    "# TODO: here we're assuming that all columns are bad if net gen is bad, and that all bad rows have bad net gen.\n",
+    "dat_cols = [\n",
+    "    \"gross_generation_mwh\",\n",
+    "    \"steam_load_1000_lb\",\n",
+    "    \"fuel_consumed_mmbtu\",\n",
+    "    \"co2_mass_lb\",\n",
+    "    \"nox_mass_lb\",\n",
+    "    \"so2_mass_lb\",\n",
+    "    \"plant_id_epa\",\n",
+    "    \"co2_mass_measurement_code\",\n",
+    "    \"nox_mass_measurement_code\",\n",
+    "    \"so2_mass_measurement_code\",\n",
+    "    \"report_date\",\n",
+    "    \"energy_source_code\",\n",
+    "    \"ch4_mass_lb\",\n",
+    "    \"n2o_mass_lb\",\n",
+    "    \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "    \"co2_mass_lb_for_electricity\",\n",
+    "    \"ch4_mass_lb_for_electricity\",\n",
+    "    \"n2o_mass_lb_for_electricity\",\n",
+    "    \"nox_mass_lb_for_electricity\",\n",
+    "    \"so2_mass_lb_for_electricity\",\n",
+    "    \"co2_mass_lb_adjusted\",\n",
+    "    \"ch4_mass_lb_adjusted\",\n",
+    "    \"n2o_mass_lb_adjusted\",\n",
+    "    \"nox_mass_lb_adjusted\",\n",
+    "    \"so2_mass_lb_adjusted\",\n",
+    "    \"net_generation_mwh\",\n",
+    "]\n",
     "bad = cems_cap.net_generation_mwh > cems_cap.net_capacity_mw\n",
-    "cems_cap.loc[bad,dat_cols] = np.nan"
+    "cems_cap.loc[bad, dat_cols] = np.nan"
    ]
   },
   {
@@ -212,8 +252,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "## What proportion of CEMS data was ID'ed as bad using capacity filter? \n",
-    "sum(bad)/len(cems)"
+    "## What proportion of CEMS data was ID'ed as bad using capacity filter?\n",
+    "sum(bad) / len(cems)"
    ]
   },
   {
@@ -224,9 +264,19 @@
    "source": [
     "plant = 2410\n",
     "\n",
-    "print(gens_unit.loc[gens_unit.plant_id_eia==plant,[\"plant_id_eia\", \"subplant_id\", \"generator_id\",\"net_capacity_mw\"]])\n",
+    "print(\n",
+    "    gens_unit.loc[\n",
+    "        gens_unit.plant_id_eia == plant,\n",
+    "        [\"plant_id_eia\", \"subplant_id\", \"generator_id\", \"net_capacity_mw\"],\n",
+    "    ]\n",
+    ")\n",
     "\n",
-    "px.line(cems_cap[cems_cap.plant_id_eia==plant], x=\"datetime_utc\", y=\"net_generation_mwh\", color=\"subplant_id\")"
+    "px.line(\n",
+    "    cems_cap[cems_cap.plant_id_eia == plant],\n",
+    "    x=\"datetime_utc\",\n",
+    "    y=\"net_generation_mwh\",\n",
+    "    color=\"subplant_id\",\n",
+    ")"
    ]
   },
   {
@@ -235,9 +285,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "print(gens_unit.loc[gens_unit.plant_id_eia==plant,[\"plant_id_eia\", \"subplant_id\", \"generator_id\",\"net_capacity_mw\"]])\n",
+    "print(\n",
+    "    gens_unit.loc[\n",
+    "        gens_unit.plant_id_eia == plant,\n",
+    "        [\"plant_id_eia\", \"subplant_id\", \"generator_id\", \"net_capacity_mw\"],\n",
+    "    ]\n",
+    ")\n",
     "\n",
-    "px.line(cems[cems.plant_id_eia==plant], x=\"datetime_utc\", y=\"net_generation_mwh\", color=\"subplant_id\", title=f\"plant id = {plant}\")"
+    "px.line(\n",
+    "    cems[cems.plant_id_eia == plant],\n",
+    "    x=\"datetime_utc\",\n",
+    "    y=\"net_generation_mwh\",\n",
+    "    color=\"subplant_id\",\n",
+    "    title=f\"plant id = {plant}\",\n",
+    ")"
    ]
   },
   {
@@ -256,9 +317,41 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Get per-plant, per-variable median and IQR \n",
-    "numeric_cols = ['gross_generation_mwh', 'steam_load_1000_lb', 'fuel_consumed_mmbtu', 'co2_mass_lb', 'ch4_mass_lb', 'n2o_mass_lb', 'nox_mass_lb', 'so2_mass_lb', 'co2_mass_lb_adjusted', 'ch4_mass_lb_adjusted', 'n2o_mass_lb_adjusted', 'nox_mass_lb_adjusted', 'so2_mass_lb_adjusted', 'net_generation_mwh', 'fuel_consumed_for_electricity_mmbtu', 'co2_mass_lb_for_electricity', 'co2_mass_lb_for_electricity_adjusted', 'ch4_mass_lb_for_electricity', 'ch4_mass_lb_for_electricity_adjusted', 'n2o_mass_lb_for_electricity', 'n2o_mass_lb_for_electricity_adjusted', 'nox_mass_lb_for_electricity', 'nox_mass_lb_for_electricity_adjusted', 'so2_mass_lb_for_electricity', 'so2_mass_lb_for_electricity_adjusted', 'co2e_mass_lb', 'co2e_mass_lb_adjusted', 'co2e_mass_lb_for_electricity', 'co2e_mass_lb_for_electricity_adjusted']\n",
-    "iqr = cems.groupby([\"plant_id_eia\", \"subplant_id\"])[numeric_cols].quantile(.75) - cems.groupby([\"plant_id_eia\",\"subplant_id\"])[numeric_cols].quantile(.25)"
+    "# Get per-plant, per-variable median and IQR\n",
+    "numeric_cols = [\n",
+    "    \"gross_generation_mwh\",\n",
+    "    \"steam_load_1000_lb\",\n",
+    "    \"fuel_consumed_mmbtu\",\n",
+    "    \"co2_mass_lb\",\n",
+    "    \"ch4_mass_lb\",\n",
+    "    \"n2o_mass_lb\",\n",
+    "    \"nox_mass_lb\",\n",
+    "    \"so2_mass_lb\",\n",
+    "    \"co2_mass_lb_adjusted\",\n",
+    "    \"ch4_mass_lb_adjusted\",\n",
+    "    \"n2o_mass_lb_adjusted\",\n",
+    "    \"nox_mass_lb_adjusted\",\n",
+    "    \"so2_mass_lb_adjusted\",\n",
+    "    \"net_generation_mwh\",\n",
+    "    \"fuel_consumed_for_electricity_mmbtu\",\n",
+    "    \"co2_mass_lb_for_electricity\",\n",
+    "    \"co2_mass_lb_for_electricity_adjusted\",\n",
+    "    \"ch4_mass_lb_for_electricity\",\n",
+    "    \"ch4_mass_lb_for_electricity_adjusted\",\n",
+    "    \"n2o_mass_lb_for_electricity\",\n",
+    "    \"n2o_mass_lb_for_electricity_adjusted\",\n",
+    "    \"nox_mass_lb_for_electricity\",\n",
+    "    \"nox_mass_lb_for_electricity_adjusted\",\n",
+    "    \"so2_mass_lb_for_electricity\",\n",
+    "    \"so2_mass_lb_for_electricity_adjusted\",\n",
+    "    \"co2e_mass_lb\",\n",
+    "    \"co2e_mass_lb_adjusted\",\n",
+    "    \"co2e_mass_lb_for_electricity\",\n",
+    "    \"co2e_mass_lb_for_electricity_adjusted\",\n",
+    "]\n",
+    "iqr = cems.groupby([\"plant_id_eia\", \"subplant_id\"])[numeric_cols].quantile(\n",
+    "    0.75\n",
+    ") - cems.groupby([\"plant_id_eia\", \"subplant_id\"])[numeric_cols].quantile(0.25)"
    ]
   },
   {
@@ -267,7 +360,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "median = cems.groupby([\"plant_id_eia\",\"subplant_id\"]).median()"
+    "median = cems.groupby([\"plant_id_eia\", \"subplant_id\"]).median()"
    ]
   },
   {
@@ -287,8 +380,7 @@
    "outputs": [],
    "source": [
     "cems_filtered = cems.copy()\n",
-    "#for plant in cems_filtered.plant_id_eia.unique():\n",
-    "    "
+    "# for plant in cems_filtered.plant_id_eia.unique():"
    ]
   },
   {
@@ -297,7 +389,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems_filtered = cems_filtered.set_index([\"plant_id_eia\",\"subplant_id\"])"
+    "cems_filtered = cems_filtered.set_index([\"plant_id_eia\", \"subplant_id\"])"
    ]
   },
   {
@@ -306,7 +398,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems_filtered.loc[(3,1)]"
+    "cems_filtered.loc[(3, 1)]"
    ]
   },
   {
@@ -315,7 +407,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "checked = (cems_filtered.loc[(3,1), lower_bound.columns] < lower_bound.loc[(3,1)]) | (cems_filtered.loc[(3,1), lower_bound.columns] > upper_bound.loc[(3, 1)])"
+    "checked = (cems_filtered.loc[(3, 1), lower_bound.columns] < lower_bound.loc[(3, 1)]) | (\n",
+    "    cems_filtered.loc[(3, 1), lower_bound.columns] > upper_bound.loc[(3, 1)]\n",
+    ")"
    ]
   },
   {
@@ -333,8 +427,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "toplot = cems_filtered.loc[(3,1)]\n",
-    "px.scatter(toplot, x=\"datetime_utc\", y=\"fuel_consumed_mmbtu\", color=checked[\"fuel_consumed_mmbtu\"])"
+    "toplot = cems_filtered.loc[(3, 1)]\n",
+    "px.scatter(\n",
+    "    toplot,\n",
+    "    x=\"datetime_utc\",\n",
+    "    y=\"fuel_consumed_mmbtu\",\n",
+    "    color=checked[\"fuel_consumed_mmbtu\"],\n",
+    ")"
    ]
   },
   {
@@ -343,7 +442,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "median.loc[(3,1)]"
+    "median.loc[(3, 1)]"
    ]
   }
  ],
diff --git a/notebooks/work_in_progress/issue230_spikes.ipynb b/notebooks/work_in_progress/issue230_spikes.ipynb
index 5a3468ed..7a98e8e2 100644
--- a/notebooks/work_in_progress/issue230_spikes.ipynb
+++ b/notebooks/work_in_progress/issue230_spikes.ipynb
@@ -24,7 +24,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pandas as pd \n",
+    "import pandas as pd\n",
     "import plotly.express as px"
    ]
   },
@@ -39,11 +39,12 @@
     "\n",
     "# # Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../src/')\n",
+    "\n",
+    "sys.path.append(\"../../src/\")\n",
     "\n",
     "import impute_hourly_profiles\n",
     "from filepaths import outputs_folder, results_folder\n",
-    "from column_checks import get_dtypes\n"
+    "from column_checks import get_dtypes"
    ]
   },
   {
@@ -52,7 +53,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "problem_profiles = pd.read_csv(f\"{outputs_folder()}/2020/hourly_profiles_2020.csv\", dtype=get_dtypes())"
+    "problem_profiles = pd.read_csv(\n",
+    "    f\"{outputs_folder()}/2020/hourly_profiles_2020.csv\", dtype=get_dtypes()\n",
+    ")"
    ]
   },
   {
@@ -61,7 +64,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "problem_profiles = problem_profiles[(problem_profiles.ba_code==\"PJM\") & (problem_profiles.fuel_category==\"nuclear\")]"
+    "problem_profiles = problem_profiles[\n",
+    "    (problem_profiles.ba_code == \"PJM\") & (problem_profiles.fuel_category == \"nuclear\")\n",
+    "]"
    ]
   },
   {
@@ -72,7 +77,16 @@
    "source": [
     "problem_profiles.head()\n",
     "\n",
-    "px.line(problem_profiles, x=\"datetime_utc\", y=[\"residual_profile\",\"scaled_residual_profile\",\"shifted_residual_profile\",\"profile\"])"
+    "px.line(\n",
+    "    problem_profiles,\n",
+    "    x=\"datetime_utc\",\n",
+    "    y=[\n",
+    "        \"residual_profile\",\n",
+    "        \"scaled_residual_profile\",\n",
+    "        \"shifted_residual_profile\",\n",
+    "        \"profile\",\n",
+    "    ],\n",
+    ")"
    ]
   },
   {
@@ -81,7 +95,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "problem_profiles.profile_method.unique()\n"
+    "problem_profiles.profile_method.unique()"
    ]
   },
   {
@@ -90,7 +104,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "problem_profiles[(problem_profiles.datetime_utc > \"2020-04-15\") & (problem_profiles.datetime_utc < \"2020-04-16\")]"
+    "problem_profiles[\n",
+    "    (problem_profiles.datetime_utc > \"2020-04-15\")\n",
+    "    & (problem_profiles.datetime_utc < \"2020-04-16\")\n",
+    "]"
    ]
   },
   {
@@ -99,8 +116,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "### Look for the issue in the next output \n",
-    "shaped = pd.read_csv(f\"{outputs_folder()}/2020/shaped_eia923_data_2020.csv\", dtype=get_dtypes())"
+    "### Look for the issue in the next output\n",
+    "shaped = pd.read_csv(\n",
+    "    f\"{outputs_folder()}/2020/shaped_eia923_data_2020.csv\", dtype=get_dtypes()\n",
+    ")"
    ]
   },
   {
@@ -109,7 +128,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "shaped = shaped[(shaped.ba_code==\"PJM\")]"
+    "shaped = shaped[(shaped.ba_code == \"PJM\")]"
    ]
   },
   {
@@ -118,7 +137,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "px.line(shaped, x=\"datetime_utc\", y=\"net_generation_mwh\", line_group=\"fuel_category\", color=\"fuel_category\")"
+    "px.line(\n",
+    "    shaped,\n",
+    "    x=\"datetime_utc\",\n",
+    "    y=\"net_generation_mwh\",\n",
+    "    line_group=\"fuel_category\",\n",
+    "    color=\"fuel_category\",\n",
+    ")"
    ]
   },
   {
@@ -127,8 +152,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#### Ok, issue is not in 923 shaped data, so it must just be in CEMS data \n",
-    "plant_level = pd.read_csv(f\"{results_folder()}/2020/plant_data/hourly/us_units/individual_plant_data.csv\")"
+    "#### Ok, issue is not in 923 shaped data, so it must just be in CEMS data\n",
+    "plant_level = pd.read_csv(\n",
+    "    f\"{results_folder()}/2020/plant_data/hourly/us_units/individual_plant_data.csv\"\n",
+    ")"
    ]
   },
   {
diff --git a/notebooks/work_in_progress/sandbox.ipynb b/notebooks/work_in_progress/sandbox.ipynb
index b69f7db2..37714709 100644
--- a/notebooks/work_in_progress/sandbox.ipynb
+++ b/notebooks/work_in_progress/sandbox.ipynb
@@ -17,7 +17,8 @@
     "\n",
     "# # Tell python where to look for modules.\n",
     "import sys\n",
-    "sys.path.append('../../../open-grid-emissions/src/')\n",
+    "\n",
+    "sys.path.append(\"../../../open-grid-emissions/src/\")\n",
     "\n",
     "import download_data\n",
     "import load_data\n",
@@ -31,6 +32,7 @@
     "import gross_to_net_generation\n",
     "import eia930\n",
     "from logging_util import get_logger, configure_root_logger\n",
+    "\n",
     "configure_root_logger()\n",
     "logger = get_logger(\"test\")\n",
     "\n",
diff --git a/notebooks/work_in_progress/uncertainty_analysis.ipynb b/notebooks/work_in_progress/uncertainty_analysis.ipynb
index e327b28a..79f519d4 100644
--- a/notebooks/work_in_progress/uncertainty_analysis.ipynb
+++ b/notebooks/work_in_progress/uncertainty_analysis.ipynb
@@ -18,7 +18,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Calculate min and max possible at each hour \n",
+    "# Calculate min and max possible at each hour\n",
     "import pandas as pd\n",
     "import plotly.express as px\n",
     "import plotly.graph_objects as go\n",
@@ -34,10 +34,11 @@
     "%reload_ext autoreload\n",
     "%autoreload 2\n",
     "\n",
-    "# Tell python where to look for modules. \n",
+    "# Tell python where to look for modules.\n",
     "# Depending on how your jupyter handles working directories, this may not be needed.\n",
     "import sys\n",
-    "sys.path.append('../../hourly-egrid/')\n",
+    "\n",
+    "sys.path.append(\"../../hourly-egrid/\")\n",
     "\n",
     "import src.load_data as load_data\n",
     "import src.column_checks as column_checks"
@@ -49,7 +50,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "year = 2020 "
+    "year = 2020"
    ]
   },
   {
@@ -74,8 +75,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "flat = pd.read_csv(f\"../data/results/flat/{year}/power_sector_data/hourly/us_units/{ba}.csv\", parse_dates=[\"datetime_utc\"])\n",
-    "base = pd.read_csv(f\"../data/results/{year}/power_sector_data/hourly/us_units/{ba}.csv\", parse_dates=[\"datetime_utc\"])\n"
+    "flat = pd.read_csv(\n",
+    "    f\"../data/results/flat/{year}/power_sector_data/hourly/us_units/{ba}.csv\",\n",
+    "    parse_dates=[\"datetime_utc\"],\n",
+    ")\n",
+    "base = pd.read_csv(\n",
+    "    f\"../data/results/{year}/power_sector_data/hourly/us_units/{ba}.csv\",\n",
+    "    parse_dates=[\"datetime_utc\"],\n",
+    ")"
    ]
   },
   {
@@ -84,8 +91,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "flat = flat[flat.fuel_category==\"total\"]\n",
-    "base = base[base.fuel_category==\"total\"]"
+    "flat = flat[flat.fuel_category == \"total\"]\n",
+    "base = base[base.fuel_category == \"total\"]"
    ]
   },
   {
@@ -94,7 +101,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "both = flat.merge(base, how='left', on='datetime_utc', suffixes=(\"_flat\", \"_base\"))"
+    "both = flat.merge(base, how=\"left\", on=\"datetime_utc\", suffixes=(\"_flat\", \"_base\"))"
    ]
   },
   {
@@ -112,10 +119,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fig = px.line(both, x=\"datetime_utc\", y=['generated_co2e_rate_lb_per_mwh_for_electricity_adjusted_flat','generated_co2e_rate_lb_per_mwh_for_electricity_adjusted_base'], title=f\"{ba}, carbon intensity using flat vs. base generation\")\n",
+    "fig = px.line(\n",
+    "    both,\n",
+    "    x=\"datetime_utc\",\n",
+    "    y=[\n",
+    "        \"generated_co2e_rate_lb_per_mwh_for_electricity_adjusted_flat\",\n",
+    "        \"generated_co2e_rate_lb_per_mwh_for_electricity_adjusted_base\",\n",
+    "    ],\n",
+    "    title=f\"{ba}, carbon intensity using flat vs. base generation\",\n",
+    ")\n",
     "fig.show()\n",
-    "#newnames = {'generated_co2_rate_lb_per_mwh_adjusted': 'Our data', 'generated_co2_rate_lb_per_mwh_for_electricity_adjusted': 'Real-time data'}\n",
-    "#fig.for_each_trace(lambda t: t.update(name = newnames[t.name]))"
+    "# newnames = {'generated_co2_rate_lb_per_mwh_adjusted': 'Our data', 'generated_co2_rate_lb_per_mwh_for_electricity_adjusted': 'Real-time data'}\n",
+    "# fig.for_each_trace(lambda t: t.update(name = newnames[t.name]))"
    ]
   },
   {
@@ -131,7 +146,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems = pd.read_csv(\"../data/results/2020/plant_data/hourly/us_units/individual_plant_data.csv\", parse_dates=[\"datetime_utc\", \"report_date\"])\n",
+    "cems = pd.read_csv(\n",
+    "    \"../data/results/2020/plant_data/hourly/us_units/individual_plant_data.csv\",\n",
+    "    parse_dates=[\"datetime_utc\", \"report_date\"],\n",
+    ")\n",
     "plant_meta = pd.read_csv(\"../data/results/2020/plant_data/plant_static_attributes.csv\")"
    ]
   },
@@ -141,7 +159,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "cems = cems.merge(plant_meta, how='left', on='plant_id_eia')"
+    "cems = cems.merge(plant_meta, how=\"left\", on=\"plant_id_eia\")"
    ]
   },
   {
@@ -178,8 +196,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "caps = pudl_reader.gens_eia860()[[\"plant_id_eia\", \"capacity_mw\", \"summer_capacity_mw\", \"winter_capacity_mw\"]].copy()\n",
-    "caps[\"max_capacity\"] = caps[[\"capacity_mw\", \"summer_capacity_mw\", \"winter_capacity_mw\"]].max(axis=1)\n",
+    "caps = pudl_reader.gens_eia860()[\n",
+    "    [\"plant_id_eia\", \"capacity_mw\", \"summer_capacity_mw\", \"winter_capacity_mw\"]\n",
+    "].copy()\n",
+    "caps[\"max_capacity\"] = caps[\n",
+    "    [\"capacity_mw\", \"summer_capacity_mw\", \"winter_capacity_mw\"]\n",
+    "].max(axis=1)\n",
     "caps = caps.groupby(\"plant_id_eia\").sum()[\"max_capacity\"]"
    ]
   },
@@ -189,7 +211,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "monthly_rates = pd.read_csv(\"../data/results/2020/plant_data/monthly/us_units/plant_data.csv\", parse_dates=[\"report_date\"])"
+    "monthly_rates = pd.read_csv(\n",
+    "    \"../data/results/2020/plant_data/monthly/us_units/plant_data.csv\",\n",
+    "    parse_dates=[\"report_date\"],\n",
+    ")"
    ]
   },
   {
@@ -198,8 +223,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "monthly_rates = monthly_rates.merge(caps, how='left', left_on='plant_id_eia', right_index=True)\n",
-    "monthly_rates = monthly_rates.merge(plant_meta, how='left', on='plant_id_eia')"
+    "monthly_rates = monthly_rates.merge(\n",
+    "    caps, how=\"left\", left_on=\"plant_id_eia\", right_index=True\n",
+    ")\n",
+    "monthly_rates = monthly_rates.merge(plant_meta, how=\"left\", on=\"plant_id_eia\")"
    ]
   },
   {
@@ -208,12 +235,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Calculate per-plant, per-month maximum net generation and emissions: \n",
+    "# Calculate per-plant, per-month maximum net generation and emissions:\n",
     "#   max net gen = capacity\n",
     "#   max emissions = capacity * monthly rate\n",
     "pols = [\"co2\", \"ch4\", \"n2o\", \"co2e\", \"nox\", \"so2\"]\n",
     "for pol in pols:\n",
-    "    monthly_rates[f\"max_hourly_{pol}\"] = monthly_rates[\"max_capacity\"] * (monthly_rates[f\"{pol}_mass_lb_for_electricity_adjusted\"]/monthly_rates[\"net_generation_mwh\"])"
+    "    monthly_rates[f\"max_hourly_{pol}\"] = monthly_rates[\"max_capacity\"] * (\n",
+    "        monthly_rates[f\"{pol}_mass_lb_for_electricity_adjusted\"]\n",
+    "        / monthly_rates[\"net_generation_mwh\"]\n",
+    "    )"
    ]
   },
   {
@@ -224,10 +254,17 @@
    "source": [
     "# Filter for non-CEMS plants\n",
     "# Filter for plants with non-zero generation in this month: we know that hourly generation shouldn't include the capacity of these plants\n",
-    "# Group by BA and month to get monthly max hourly emissions \n",
+    "# Group by BA and month to get monthly max hourly emissions\n",
     "pol_cols = [f\"max_hourly_{p}\" for p in pols] + [\"max_capacity\"]\n",
-    "filtered_monthly_rates = monthly_rates[(monthly_rates.data_availability == \"eia_only\") & (monthly_rates.net_generation_mwh > 0)]\n",
-    "non_cems_maxes = filtered_monthly_rates.groupby([\"report_date\", \"ba_code\"])[pol_cols].sum().reset_index()"
+    "filtered_monthly_rates = monthly_rates[\n",
+    "    (monthly_rates.data_availability == \"eia_only\")\n",
+    "    & (monthly_rates.net_generation_mwh > 0)\n",
+    "]\n",
+    "non_cems_maxes = (\n",
+    "    filtered_monthly_rates.groupby([\"report_date\", \"ba_code\"])[pol_cols]\n",
+    "    .sum()\n",
+    "    .reset_index()\n",
+    ")"
    ]
   },
   {
@@ -236,13 +273,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Identify plants with negative net generation. \n",
+    "# Identify plants with negative net generation.\n",
     "# These will be included in net generation minimum, emissions maximum\n",
-    "negative_monthly = monthly_rates[(monthly_rates.data_availability == \"eia_only\") & (monthly_rates.net_generation_mwh < 0)]\n",
+    "negative_monthly = monthly_rates[\n",
+    "    (monthly_rates.data_availability == \"eia_only\")\n",
+    "    & (monthly_rates.net_generation_mwh < 0)\n",
+    "]\n",
     "assert len(negative_monthly) == 0\n",
     "\n",
     "\n",
-    "# ########### FAKE NEGATIVE PLANT FOR TESTING \n",
+    "# ########### FAKE NEGATIVE PLANT FOR TESTING\n",
     "# negative_monthly = monthly_rates[(monthly_rates.plant_id_eia == 3)].copy()\n",
     "# negative_monthly.loc[:,\"net_generation_mwh\"] = -300"
    ]
@@ -264,8 +304,12 @@
    "outputs": [],
    "source": [
     "# Add report date back in (dropped during groupby)\n",
-    "report_dates = cems.groupby([\"datetime_utc\",\"ba_code\"])[\"report_date\"].first().reset_index()\n",
-    "minimum_bound = minimum_bound.merge(report_dates, how='left', on=[\"datetime_utc\",\"ba_code\"])"
+    "report_dates = (\n",
+    "    cems.groupby([\"datetime_utc\", \"ba_code\"])[\"report_date\"].first().reset_index()\n",
+    ")\n",
+    "minimum_bound = minimum_bound.merge(\n",
+    "    report_dates, how=\"left\", on=[\"datetime_utc\", \"ba_code\"]\n",
+    ")"
    ]
   },
   {
@@ -274,8 +318,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Calculate maximum by merging hourly max emissions and generation into minimum, then adding \n",
-    "maximum_bound = minimum_bound.merge(non_cems_maxes, how='left', on=[\"report_date\", \"ba_code\"])"
+    "# Calculate maximum by merging hourly max emissions and generation into minimum, then adding\n",
+    "maximum_bound = minimum_bound.merge(\n",
+    "    non_cems_maxes, how=\"left\", on=[\"report_date\", \"ba_code\"]\n",
+    ")"
    ]
   },
   {
@@ -285,8 +331,13 @@
    "outputs": [],
    "source": [
     "for pol in pols:\n",
-    "    maximum_bound[f\"{pol}_mass_lb_for_electricity_adjusted\"] = maximum_bound[f\"{pol}_mass_lb_for_electricity_adjusted\"] +  maximum_bound[f\"max_hourly_{pol}\"]\n",
-    "maximum_bound[\"net_generation_mwh\"] = maximum_bound[\"net_generation_mwh\"] + maximum_bound[\"max_capacity\"]\n",
+    "    maximum_bound[f\"{pol}_mass_lb_for_electricity_adjusted\"] = (\n",
+    "        maximum_bound[f\"{pol}_mass_lb_for_electricity_adjusted\"]\n",
+    "        + maximum_bound[f\"max_hourly_{pol}\"]\n",
+    "    )\n",
+    "maximum_bound[\"net_generation_mwh\"] = (\n",
+    "    maximum_bound[\"net_generation_mwh\"] + maximum_bound[\"max_capacity\"]\n",
+    ")\n",
     "maximum_bound = maximum_bound[cols + [\"datetime_utc\", \"ba_code\"]]"
    ]
   },
@@ -312,8 +363,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "flat = pd.read_csv(f\"../data/results/flat/2020/power_sector_data/hourly/us_units/{ba}.csv\", parse_dates=[\"datetime_utc\"])\n",
-    "base = pd.read_csv(f\"../data/results/2020/power_sector_data/hourly/us_units/{ba}.csv\", parse_dates=[\"datetime_utc\"])"
+    "flat = pd.read_csv(\n",
+    "    f\"../data/results/flat/2020/power_sector_data/hourly/us_units/{ba}.csv\",\n",
+    "    parse_dates=[\"datetime_utc\"],\n",
+    ")\n",
+    "base = pd.read_csv(\n",
+    "    f\"../data/results/2020/power_sector_data/hourly/us_units/{ba}.csv\",\n",
+    "    parse_dates=[\"datetime_utc\"],\n",
+    ")"
    ]
   },
   {
@@ -322,7 +379,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Only take totals \n",
+    "# Only take totals\n",
     "flat = flat[flat.fuel_category == \"total\"]\n",
     "base = base[base.fuel_category == \"total\"]"
    ]
@@ -333,30 +390,50 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "this_max = maximum_bound[maximum_bound.ba_code==ba]\n",
-    "this_min = minimum_bound[minimum_bound.ba_code==ba]\n",
+    "this_max = maximum_bound[maximum_bound.ba_code == ba]\n",
+    "this_min = minimum_bound[minimum_bound.ba_code == ba]\n",
     "\n",
     "fig = go.Figure()\n",
-    "fig.add_trace(go.Scatter(x=this_min.datetime_utc, y=this_min.net_generation_mwh,\n",
-    "    fill=None,\n",
-    "    mode='lines',\n",
-    "    line_color='indigo', showlegend=False\n",
-    "    ))\n",
-    "fig.add_trace(go.Scatter(\n",
-    "    x=this_max.datetime_utc,\n",
-    "    y=this_max.net_generation_mwh,\n",
-    "    fill='tonexty', # fill area between trace0 and trace1\n",
-    "    mode='lines', line_color='indigo', name=\"Min/max possible\"))\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=this_min.datetime_utc,\n",
+    "        y=this_min.net_generation_mwh,\n",
+    "        fill=None,\n",
+    "        mode=\"lines\",\n",
+    "        line_color=\"indigo\",\n",
+    "        showlegend=False,\n",
+    "    )\n",
+    ")\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=this_max.datetime_utc,\n",
+    "        y=this_max.net_generation_mwh,\n",
+    "        fill=\"tonexty\",  # fill area between trace0 and trace1\n",
+    "        mode=\"lines\",\n",
+    "        line_color=\"indigo\",\n",
+    "        name=\"Min/max possible\",\n",
+    "    )\n",
+    ")\n",
     "\n",
-    "fig.add_trace(go.Scatter(\n",
-    "    x=flat.datetime_utc,\n",
-    "    y=flat.net_generation_mwh,\n",
-    "    mode='lines', line_color='brown', name=\"Flat\"))\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=flat.datetime_utc,\n",
+    "        y=flat.net_generation_mwh,\n",
+    "        mode=\"lines\",\n",
+    "        line_color=\"brown\",\n",
+    "        name=\"Flat\",\n",
+    "    )\n",
+    ")\n",
     "\n",
-    "fig.add_trace(go.Scatter(\n",
-    "    x=base.datetime_utc,\n",
-    "    y=base.net_generation_mwh,\n",
-    "    mode='lines', line_color='blue', name=\"Best guess\"))\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=base.datetime_utc,\n",
+    "        y=base.net_generation_mwh,\n",
+    "        mode=\"lines\",\n",
+    "        line_color=\"blue\",\n",
+    "        name=\"Best guess\",\n",
+    "    )\n",
+    ")\n",
     "\n",
     "fig.update_layout(\n",
     "    title=f\"{ba} total net generation estimates\",\n",
@@ -365,7 +442,7 @@
     "    legend_title=\"Estimate type\",\n",
     ")\n",
     "\n",
-    "fig.show()\n"
+    "fig.show()"
    ]
   },
   {
@@ -374,30 +451,50 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "this_max = maximum_bound[maximum_bound.ba_code==ba]\n",
-    "this_min = minimum_bound[minimum_bound.ba_code==ba]\n",
+    "this_max = maximum_bound[maximum_bound.ba_code == ba]\n",
+    "this_min = minimum_bound[minimum_bound.ba_code == ba]\n",
     "\n",
     "fig = go.Figure()\n",
-    "fig.add_trace(go.Scatter(x=this_min.datetime_utc, y=this_min.co2_mass_lb_for_electricity_adjusted,\n",
-    "    fill=None,\n",
-    "    mode='lines',\n",
-    "    line_color='indigo', showlegend=False\n",
-    "    ))\n",
-    "fig.add_trace(go.Scatter(\n",
-    "    x=this_max.datetime_utc,\n",
-    "    y=this_max.co2_mass_lb_for_electricity_adjusted,\n",
-    "    fill='tonexty', # fill area between trace0 and trace1\n",
-    "    mode='lines', line_color='indigo', name=\"Min/max possible\"))\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=this_min.datetime_utc,\n",
+    "        y=this_min.co2_mass_lb_for_electricity_adjusted,\n",
+    "        fill=None,\n",
+    "        mode=\"lines\",\n",
+    "        line_color=\"indigo\",\n",
+    "        showlegend=False,\n",
+    "    )\n",
+    ")\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=this_max.datetime_utc,\n",
+    "        y=this_max.co2_mass_lb_for_electricity_adjusted,\n",
+    "        fill=\"tonexty\",  # fill area between trace0 and trace1\n",
+    "        mode=\"lines\",\n",
+    "        line_color=\"indigo\",\n",
+    "        name=\"Min/max possible\",\n",
+    "    )\n",
+    ")\n",
     "\n",
-    "fig.add_trace(go.Scatter(\n",
-    "    x=flat.datetime_utc,\n",
-    "    y=flat.co2_mass_lb_for_electricity_adjusted,\n",
-    "    mode='lines', line_color='brown', name=\"Flat\"))\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=flat.datetime_utc,\n",
+    "        y=flat.co2_mass_lb_for_electricity_adjusted,\n",
+    "        mode=\"lines\",\n",
+    "        line_color=\"brown\",\n",
+    "        name=\"Flat\",\n",
+    "    )\n",
+    ")\n",
     "\n",
-    "fig.add_trace(go.Scatter(\n",
-    "    x=base.datetime_utc,\n",
-    "    y=base.co2_mass_lb_for_electricity_adjusted,\n",
-    "    mode='lines', line_color='blue', name=\"Best guess\"))\n",
+    "fig.add_trace(\n",
+    "    go.Scatter(\n",
+    "        x=base.datetime_utc,\n",
+    "        y=base.co2_mass_lb_for_electricity_adjusted,\n",
+    "        mode=\"lines\",\n",
+    "        line_color=\"blue\",\n",
+    "        name=\"Best guess\",\n",
+    "    )\n",
+    ")\n",
     "\n",
     "fig.update_layout(\n",
     "    title=f\"{ba} total carbon emissions\",\n",
@@ -406,7 +503,7 @@
     "    legend_title=\"Estimate type\",\n",
     ")\n",
     "\n",
-    "fig.show()\n"
+    "fig.show()"
    ]
   },
   {
@@ -427,23 +524,51 @@
     "\n",
     "issues = []\n",
     "for ba in os.listdir(\"../data/results/2020/power_sector_data/hourly/us_units/\"):\n",
-    "    if \".DS_Store\" in ba: \n",
+    "    if \".DS_Store\" in ba:\n",
     "        continue\n",
-    "    our_guess = pd.read_csv(f\"../data/results/2020/power_sector_data/hourly/us_units/{ba}\", parse_dates=[\"datetime_utc\"])\n",
+    "    our_guess = pd.read_csv(\n",
+    "        f\"../data/results/2020/power_sector_data/hourly/us_units/{ba}\",\n",
+    "        parse_dates=[\"datetime_utc\"],\n",
+    "    )\n",
     "    our_guess = our_guess[our_guess.fuel_category == \"total\"]\n",
     "    ba = ba.replace(\".csv\", \"\")\n",
-    "    to_compare = our_guess[[\"datetime_utc\", \"co2_mass_lb_for_electricity_adjusted\", \"net_generation_mwh\"]].merge(minimum_bound.loc[minimum_bound.ba_code==ba,[\"datetime_utc\", \"co2_mass_lb_for_electricity_adjusted\", \"net_generation_mwh\"]],\n",
-    "        how='left',\n",
-    "        on='datetime_utc',\n",
-    "        suffixes=(\"\",\"_min\")\n",
+    "    to_compare = our_guess[\n",
+    "        [\"datetime_utc\", \"co2_mass_lb_for_electricity_adjusted\", \"net_generation_mwh\"]\n",
+    "    ].merge(\n",
+    "        minimum_bound.loc[\n",
+    "            minimum_bound.ba_code == ba,\n",
+    "            [\n",
+    "                \"datetime_utc\",\n",
+    "                \"co2_mass_lb_for_electricity_adjusted\",\n",
+    "                \"net_generation_mwh\",\n",
+    "            ],\n",
+    "        ],\n",
+    "        how=\"left\",\n",
+    "        on=\"datetime_utc\",\n",
+    "        suffixes=(\"\", \"_min\"),\n",
     "    )\n",
-    "    to_compare = to_compare.merge(maximum_bound.loc[maximum_bound.ba_code==ba,[\"datetime_utc\", \"co2_mass_lb_for_electricity_adjusted\", \"net_generation_mwh\"]],\n",
-    "        how='left',\n",
-    "        on='datetime_utc',\n",
-    "        suffixes=(\"_best\",\"_max\")\n",
+    "    to_compare = to_compare.merge(\n",
+    "        maximum_bound.loc[\n",
+    "            maximum_bound.ba_code == ba,\n",
+    "            [\n",
+    "                \"datetime_utc\",\n",
+    "                \"co2_mass_lb_for_electricity_adjusted\",\n",
+    "                \"net_generation_mwh\",\n",
+    "            ],\n",
+    "        ],\n",
+    "        how=\"left\",\n",
+    "        on=\"datetime_utc\",\n",
+    "        suffixes=(\"_best\", \"_max\"),\n",
     "    )\n",
-    "    to_compare[\"ok\"] = (to_compare[f\"{col_to_check}_min\"] > to_compare[f\"{col_to_check}_best\"]) | (to_compare[f\"{col_to_check}_best\"] > to_compare[f\"{col_to_check}_max\"])\n",
-    "    issues.append(to_compare[[\"datetime_utc\", \"ok\"]].set_index(\"datetime_utc\").squeeze().rename(ba))\n"
+    "    to_compare[\"ok\"] = (\n",
+    "        to_compare[f\"{col_to_check}_min\"] > to_compare[f\"{col_to_check}_best\"]\n",
+    "    ) | (to_compare[f\"{col_to_check}_best\"] > to_compare[f\"{col_to_check}_max\"])\n",
+    "    issues.append(\n",
+    "        to_compare[[\"datetime_utc\", \"ok\"]]\n",
+    "        .set_index(\"datetime_utc\")\n",
+    "        .squeeze()\n",
+    "        .rename(ba)\n",
+    "    )"
    ]
   },
   {
@@ -470,7 +595,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "monthly_rates[monthly_rates.ba_code==\"DEAA\"]"
+    "monthly_rates[monthly_rates.ba_code == \"DEAA\"]"
    ]
   },
   {
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..dfe594eb
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,68 @@
+[project]
+name = "open-grid-emissions"
+requires-python = ">3.10"
+readme = "README.md"
+
+[tool.ruff]
+# Exclude a variety of commonly ignored directories.
+exclude = [
+    ".git",
+    ".github",
+    ".pytest_cache",
+    ".ruff_cache",
+    ".tox",
+    ".venv",
+    "__pypackages__",
+    "_build",
+    "build",
+    "venv",
+]
+
+# All paths are relative to the project root, which is the directory containing the pyproject.toml.
+src = ["src"]
+
+# Same as Black.
+line-length = 88
+indent-width = 4
+
+# Assume Python 3.8
+target-version = "py310"
+
+# Built-in for Jupyter
+extend-include = ["*.ipynb"]
+
+[tool.ruff.lint]
+# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
+# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
+# McCabe complexity (`C901`) by default.
+select = ["E4", "E7", "E9", "F"]
+ignore = []
+
+# Allow fix for all enabled rules (when `--fix`) is provided.
+fixable = ["ALL"]
+unfixable = []
+
+# Allow unused variables when underscore-prefixed.
+dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+
+# Exclude notebooks
+exclude = ["*.ipynb"]
+
+[tool.ruff.lint.flake8-quotes]
+docstring-quotes = "double"
+
+[tool.ruff.format]
+# Like Black, use double quotes for strings.
+quote-style = "double"
+
+# Like Black, indent with spaces, rather than tabs.
+indent-style = "space"
+
+# Like Black, respect magic trailing commas.
+skip-magic-trailing-comma = false
+
+# Like Black, automatically detect the appropriate line ending.
+line-ending = "auto"
+
+[tool.ruff.lint.pydocstyle]
+convention = "google"
diff --git a/src/__init__.py b/src/__init__.py
index efd6d259..ef0055b3 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -2,4 +2,5 @@
 import logging
 from .logging_util import configure_root_logger
 from .filepaths import outputs_folder
+
 configure_root_logger(outputs_folder("logfile.txt"), logging.INFO)
diff --git a/src/consumed.py b/src/consumed.py
index 5ff7cce8..ba75b609 100644
--- a/src/consumed.py
+++ b/src/consumed.py
@@ -439,12 +439,12 @@ def _load_rates(self):
                     )
 
                 # Cut off emissions at 9 hours after UTC year
-                emissions = emissions[:f"{self.year+1}-01-01 09:00:00+00:00"]
-                rates[((adj, pol))] = emissions
+                emissions = emissions[: f"{self.year+1}-01-01 09:00:00+00:00"]
+                rates[(adj, pol)] = emissions
 
         # Make generation data frame
         generation = pd.DataFrame(data=gens)
-        generation = generation[:f"{self.year+1}-01-01 09:00:00+00:00"]
+        generation = generation[: f"{self.year+1}-01-01 09:00:00+00:00"]
 
         return rates, generation
 
@@ -462,7 +462,7 @@ def build_matrices(self, pol: str, adj: str, date):
 
         # Build generation array, using 930 for import-only regions
         G = np.zeros(len(self.regions))
-        for (i, r) in enumerate(self.regions):
+        for i, r in enumerate(self.regions):
             if r in self.import_regions:
                 G[i] = self.eia930.df.loc[date, KEYS["E"]["NG"] % r]
             else:
@@ -513,7 +513,7 @@ def run(self):
                             consumed_emissions = np.full(len(self.regions), np.nan)
 
                     # Export
-                    for (i, r) in enumerate(self.regions):
+                    for i, r in enumerate(self.regions):
                         self.results[r].loc[date, col] = consumed_emissions[i]
                 if total_failed > 0:
                     logger.warning(
diff --git a/src/data_cleaning.py b/src/data_cleaning.py
index e5c70a14..c2bf1d24 100644
--- a/src/data_cleaning.py
+++ b/src/data_cleaning.py
@@ -1,7 +1,5 @@
 import pandas as pd
 import numpy as np
-import os
-import sqlalchemy as sa
 
 import pudl.analysis.allocate_gen_fuel as allocate_gen_fuel
 import pudl.analysis.epacamd_eia as epacamd_eia
@@ -12,7 +10,7 @@
 import emissions
 from emissions import CLEAN_FUELS
 from column_checks import get_dtypes, apply_dtypes
-from filepaths import manual_folder, outputs_folder, downloads_folder
+from filepaths import manual_folder, outputs_folder
 from logging_util import get_logger
 
 logger = get_logger(__name__)
@@ -88,9 +86,7 @@ def generate_subplant_ids(start_year, end_year, cems_ids):
     filtered_crosswalk = epacamd_eia.filter_crosswalk(crosswalk, cems_ids)
 
     # use graph analysis to identify subplants
-    crosswalk_with_subplant_ids = make_subplant_ids(
-        filtered_crosswalk
-    )
+    crosswalk_with_subplant_ids = make_subplant_ids(filtered_crosswalk)
 
     # change the eia plant id to int
     crosswalk_with_subplant_ids["plant_id_eia"] = crosswalk_with_subplant_ids[
@@ -462,9 +458,7 @@ def clean_eia923(
             "fuel_consumed_mmbtu",
             "fuel_consumed_for_electricity_mmbtu",
         ],
-    ].round(
-        1
-    )
+    ].round(1)
 
     validation.test_for_missing_energy_source_code(gen_fuel_allocated)
     validation.test_for_negative_values(gen_fuel_allocated)
@@ -637,9 +631,7 @@ def create_primary_fuel_table(gen_fuel_allocated, add_subplant_id, year):
         ascending=True,
     ).drop_duplicates(
         subset=["plant_id_eia", "subplant_id", "generator_id"], keep="last"
-    )[
-        ["plant_id_eia", "subplant_id", "generator_id", "energy_source_code"]
-    ]
+    )[["plant_id_eia", "subplant_id", "generator_id", "energy_source_code"]]
 
     if not add_subplant_id:
         gen_primary_fuel = gen_primary_fuel.drop(columns=["subplant_id"])
diff --git a/src/eia930.py b/src/eia930.py
index a4892f64..2e49a15a 100644
--- a/src/eia930.py
+++ b/src/eia930.py
@@ -12,7 +12,7 @@
 # Tell gridemissions where to find config before we load gridemissions
 os.environ["GRIDEMISSIONS_CONFIG_FILE_PATH"] = top_folder("config/gridemissions.json")
 
-from gridemissions.workflows import make_dataset
+from gridemissions.workflows import make_dataset  # noqa E402
 
 logger = get_logger(__name__)
 
@@ -459,9 +459,7 @@ def manual_930_adjust(raw: pd.DataFrame):
             & (raw.index < "2022-06-16 07:00:00+00")
         ),
         cols,
-    ].shift(
-        1, freq="H"
-    )
+    ].shift(1, freq="H")
     raw = raw.drop(columns=cols)
     raw = pd.concat([raw, new], axis="columns")
 
diff --git a/src/gross_to_net_generation.py b/src/gross_to_net_generation.py
index ad5aae58..f1cd861e 100644
--- a/src/gross_to_net_generation.py
+++ b/src/gross_to_net_generation.py
@@ -2,7 +2,6 @@
 import os
 import pandas as pd
 import statsmodels.formula.api as smf
-import sqlalchemy as sa
 import warnings
 
 # import pudl packages
@@ -13,7 +12,7 @@
 import data_cleaning
 import validation
 from column_checks import get_dtypes
-from filepaths import outputs_folder, downloads_folder
+from filepaths import outputs_folder
 from logging_util import get_logger
 
 logger = get_logger(__name__)
diff --git a/src/impute_hourly_profiles.py b/src/impute_hourly_profiles.py
index d796b013..102192cd 100644
--- a/src/impute_hourly_profiles.py
+++ b/src/impute_hourly_profiles.py
@@ -425,9 +425,7 @@ def calculate_residual(
         ~combined_data["eia930_profile"].isna(), "cems_profile"
     ] = combined_data.loc[
         ~combined_data["eia930_profile"].isna(), "cems_profile"
-    ].fillna(
-        0
-    )
+    ].fillna(0)
 
     combined_data = calculate_scaled_residual(combined_data)
     combined_data = calculate_shifted_residual(combined_data)
diff --git a/src/load_data.py b/src/load_data.py
index b1f9725e..43f6ab63 100644
--- a/src/load_data.py
+++ b/src/load_data.py
@@ -1,6 +1,5 @@
 import pandas as pd
 import numpy as np
-import os
 import sqlalchemy as sa
 import warnings
 from pathlib import Path
diff --git a/src/output_data.py b/src/output_data.py
index f8137731..4591e8bb 100644
--- a/src/output_data.py
+++ b/src/output_data.py
@@ -462,7 +462,7 @@ def write_power_sector_results(ba_fuel_data, path_prefix, skip_outputs):
 
     if not skip_outputs:
         for ba in list(ba_fuel_data.ba_code.unique()):
-            if type(ba) is not str:
+            if not isinstance(ba, str):
                 logger.warning(
                     f"not aggregating {sum(ba_fuel_data.ba_code.isna())} plants with numeric BA {ba}"
                 )
diff --git a/src/validation.py b/src/validation.py
index 2302762c..7c9f9016 100644
--- a/src/validation.py
+++ b/src/validation.py
@@ -220,7 +220,7 @@ def check_for_orphaned_cc_part_in_subplant(subplant_crosswalk):
         "prime_mover_code"
     ].agg(["unique"])
     cc_subplants["unique_cc_pms"] = [
-        ",".join(map(str, l)) for l in cc_subplants["unique"]
+        ",".join(map(str, L)) for L in cc_subplants["unique"]
     ]
     cc_subplants = cc_subplants.drop(columns="unique")
     # identify where there are subplants that only contain a single CC part
@@ -340,7 +340,7 @@ def check_missing_or_zero_generation_matches(combined_gen_data):
     # identify when there is zero or NA gross generation associated with positive net generation
     missing_gross_gen = combined_gen_data[
         (combined_gen_data["net_generation_mwh"] > 0)
-        & ((combined_gen_data["gross_generation_mwh"] == 0))
+        & (combined_gen_data["gross_generation_mwh"] == 0)
     ]
 
     # identify when there is zero or NA net generation associated with nonzero gross generation
@@ -620,9 +620,7 @@ def ensure_non_overlapping_data_from_all_sources(
         ["in_eia", "in_cems", "in_partial_cems_subplant", "in_partial_cems_plant"]
     ] = data_overlap[
         ["in_eia", "in_cems", "in_partial_cems_subplant", "in_partial_cems_plant"]
-    ].fillna(
-        0
-    )
+    ].fillna(0)
     data_overlap["number_of_locations"] = (
         data_overlap["in_eia"]
         + data_overlap["in_cems"]
@@ -1257,9 +1255,7 @@ def summarize_cems_measurement_quality(cems):
             "so2_mass_measurement_code",
             "nox_mass_measurement_code",
         ]
-    ].astype(
-        str
-    )
+    ].astype(str)
     # replace the CEMS mass measurement codes with two categories
     measurement_code_map = {
         "Measured": "Measured",
@@ -1282,9 +1278,7 @@ def summarize_cems_measurement_quality(cems):
             "so2_mass_measurement_code",
             "nox_mass_measurement_code",
         ]
-    ].replace(
-        measurement_code_map
-    )
+    ].replace(measurement_code_map)
 
     cems_quality_summary = []
     # calculate the percent of mass for each pollutant that is measured or imputed
@@ -1797,16 +1791,12 @@ def load_egrid_plant_file(year):
     ] = egrid_plant.loc[
         egrid_plant["plant_primary_fuel"].isin(CLEAN_FUELS),
         "co2_mass_lb_for_electricity_adjusted",
-    ].fillna(
-        0
-    )
+    ].fillna(0)
     egrid_plant.loc[
         egrid_plant["plant_primary_fuel"].isin(CLEAN_FUELS), "co2_mass_lb"
     ] = egrid_plant.loc[
         egrid_plant["plant_primary_fuel"].isin(CLEAN_FUELS), "co2_mass_lb"
-    ].fillna(
-        0
-    )
+    ].fillna(0)
 
     # reorder the columns
     egrid_plant = egrid_plant[
diff --git a/test/__init__.py b/test/__init__.py
index 3ccc99fe..af248428 100644
--- a/test/__init__.py
+++ b/test/__init__.py
@@ -1,3 +1,4 @@
 # Tell path where our code is
 import sys
-sys.path.append('../src')
+
+sys.path.append("../src")
diff --git a/test/test_eia.py b/test/test_eia.py
index 21642077..0203722a 100644
--- a/test/test_eia.py
+++ b/test/test_eia.py
@@ -7,6 +7,7 @@
 
 from src.eia import EIA
 
+
 # File open mocking modeled on https://stackoverflow.com/questions/1289894/how-do-i-mock-an-open-used-in-a-with-statement-using-the-mock-framework-in-pyth
 @pytest.fixture
 @patch(
diff --git a/test/test_logging.py b/test/test_logging.py
index f905175f..c97b82d1 100644
--- a/test/test_logging.py
+++ b/test/test_logging.py
@@ -3,31 +3,30 @@
 
 import pandas as pd
 
-sys.path.append('../src')
-sys.path.append('..')
+sys.path.append("../src")
+sys.path.append("..")
 
-import src.eia930 as eia930
-from src.filepaths import top_folder
+from src.filepaths import top_folder  # noqa E402
 
-from src.logging_util import get_logger, configure_root_logger
+from src.logging_util import get_logger, configure_root_logger  # noqa E402
 
 pudl_logger = logging.getLogger(name="catalystcoop.pudl")
 
-configure_root_logger(logfile=top_folder('test/test_logfile.txt'), level=logging.INFO)
+configure_root_logger(logfile=top_folder("test/test_logfile.txt"), level=logging.INFO)
 # If you call this again, nothing bad should happen. Logging statements should
 # still only show up once.
-configure_root_logger(logfile=top_folder('test/test_logfile.txt'), level=logging.INFO)
-logger = get_logger('test')
+configure_root_logger(logfile=top_folder("test/test_logfile.txt"), level=logging.INFO)
+logger = get_logger("test")
 
 
 def main():
-  """These statements should each be printed once in a nice format."""
-  logger.info('This is the OGE logger')
-  pudl_logger.info('This is the PUDL logger')
+    """These statements should each be printed once in a nice format."""
+    logger.info("This is the OGE logger")
+    pudl_logger.info("This is the PUDL logger")
 
-  df = pd.DataFrame({"a": [1,2,3], "b": [4,5,6]})
-  logger.info("\n" + df.to_string())
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    logger.info("\n" + df.to_string())
 
 
-if __name__ == '__main__':
-  main()
+if __name__ == "__main__":
+    main()