Fixed supporting file paths in 3b.

hytest-org · Jan 8, 2025 · bf690f5 · bf690f5
1 parent 60fdc0a
commit bf690f5
Showing 1 changed file with 100 additions and 76 deletions.
diff --git a/3b_Streamflow_Output_Visualization.ipynb b/3b_Streamflow_Output_Visualization.ipynb
@@ -40,12 +40,12 @@
     "\n",
     "import hydroeval as he\n",
     "\n",
-    "#import hyswap\n",
-    "#from hyswap.percentiles import calculate_variable_percentile_thresholds_by_day\n",
-    "#from hyswap.cumulative import calculate_daily_cumulative_values\n",
+    "# import hyswap\n",
+    "# from hyswap.percentiles import calculate_variable_percentile_thresholds_by_day\n",
+    "# from hyswap.cumulative import calculate_daily_cumulative_values\n",
     "import calendar\n",
     "import statistics\n",
-    "from sklearn.metrics import r2_score\n"
+    "from sklearn.metrics import r2_score"
    ]
   },
   {
@@ -76,7 +76,7 @@
     "    \"\"\"\n",
     "\n",
     "    # taken from Bandit bandit_helpers.py\n",
-    "    \n",
+    "\n",
     "    # Create the upstream graph\n",
     "    dag_us = dag_ds.reverse()\n",
     "\n",
@@ -89,9 +89,11 @@
     "                # Also remove the cutoff segment itself\n",
     "                dag_us.remove_node(xx)\n",
     "            except KeyError:\n",
-    "                print(f'WARNING: nhm_segment {xx} does not exist in stream network')\n",
+    "                print(f\"WARNING: nhm_segment {xx} does not exist in stream network\")\n",
     "    except TypeError:\n",
-    "        print('\\nSelected cutoffs should at least be an empty list instead of NoneType.')\n",
+    "        print(\n",
+    "            \"\\nSelected cutoffs should at least be an empty list instead of NoneType.\"\n",
+    "        )\n",
     "\n",
     "    # =======================================\n",
     "    # Given a d/s segment (dsmost_seg) create a subset of u/s segments\n",
@@ -102,9 +104,11 @@
     "        for xx in dsmost_seg:\n",
     "            try:\n",
     "                pred = nx.dfs_predecessors(dag_us, xx)\n",
-    "                uniq_seg_us = uniq_seg_us.union(set(pred.keys()).union(set(pred.values())))\n",
+    "                uniq_seg_us = uniq_seg_us.union(\n",
+    "                    set(pred.keys()).union(set(pred.values()))\n",
+    "                )\n",
     "            except KeyError:\n",
-    "                print(f'KeyError: Segment {xx} does not exist in stream network')\n",
+    "                print(f\"KeyError: Segment {xx} does not exist in stream network\")\n",
     "\n",
     "        # Get a subgraph in the dag_ds graph and return the edges\n",
     "        dag_ds_subset = dag_ds.subgraph(uniq_seg_us).copy()\n",
@@ -115,11 +119,13 @@
     "        # Add the downstream segments that exit the subgraph\n",
     "        for xx in true_outlets:\n",
     "            nhm_outlet = list(dag_ds.neighbors(xx))[0]\n",
-    "            dag_ds_subset.add_node(nhm_outlet, style='filled', fontcolor='white', fillcolor='grey')\n",
+    "            dag_ds_subset.add_node(\n",
+    "                nhm_outlet, style=\"filled\", fontcolor=\"white\", fillcolor=\"grey\"\n",
+    "            )\n",
     "            dag_ds_subset.add_edge(xx, nhm_outlet)\n",
-    "            dag_ds_subset.nodes[xx]['style'] = 'filled'\n",
-    "            dag_ds_subset.nodes[xx]['fontcolor'] = 'white'\n",
-    "            dag_ds_subset.nodes[xx]['fillcolor'] = 'blue'\n",
+    "            dag_ds_subset.nodes[xx][\"style\"] = \"filled\"\n",
+    "            dag_ds_subset.nodes[xx][\"fontcolor\"] = \"white\"\n",
+    "            dag_ds_subset.nodes[xx][\"fillcolor\"] = \"blue\"\n",
     "    else:\n",
     "        # No outlets specified so pull the full model\n",
     "        dag_ds_subset = dag_ds\n",
@@ -159,7 +165,7 @@
     "                for yy in seg_to_hru[xx]:\n",
     "                    final_hru_list.append(yy)\n",
     "            except KeyError:\n",
-    "                #print(f'Segment {xx} has no HRUs connected to it') # comment this out and add pass to not print the KeyError\n",
+    "                # print(f'Segment {xx} has no HRUs connected to it') # comment this out and add pass to not print the KeyError\n",
     "                pass\n",
     "        final_hru_list.sort()\n",
     "\n",
@@ -175,7 +181,7 @@
     "        poi = list(poi)\n",
     "\n",
     "    poi_hrus = {}\n",
-    "    nhm_seg = pdb.get('nhm_seg').data\n",
+    "    nhm_seg = pdb.get(\"nhm_seg\").data\n",
     "    pois_dict = pdb.poi_to_seg\n",
     "    seg_to_hru = pdb.seg_to_hru\n",
     "\n",
@@ -201,7 +207,7 @@
     "                    final_hru_list.append(yy)\n",
     "            except KeyError:\n",
     "                # Not all segments have HRUs connected to them\n",
-    "                #print(f'{cpoi}: Segment {xx} has no HRUs connected to it')\n",
+    "                # print(f'{cpoi}: Segment {xx} has no HRUs connected to it')\n",
     "                pass\n",
     "        final_hru_list.sort()\n",
     "        poi_hrus[cpoi] = final_hru_list\n",
@@ -216,36 +222,34 @@
    "outputs": [],
    "source": [
     "def stats_table(stats_df):\n",
-    "    \n",
+    "\n",
     "    evaluations = stats_df.discharge\n",
     "    std_evaluations = statistics.stdev(evaluations)\n",
-    "    \n",
+    "\n",
     "    simulations = stats_df.seg_outflow\n",
-    "    \n",
-    "    \n",
+    "\n",
     "    rmse = np.round(he.evaluator(he.rmse, simulations, evaluations), 2)\n",
     "    nse = np.round(he.evaluator(he.nse, simulations, evaluations), 2)\n",
     "    pbias = np.round(he.evaluator(he.pbias, simulations, evaluations), 2)\n",
     "    kge, r, alpha, beta = np.round(he.evaluator(he.kge, simulations, evaluations), 2)\n",
-    "    \n",
-    "    rsr = np.round(rmse/std_evaluations, 2)\n",
+    "\n",
+    "    rsr = np.round(rmse / std_evaluations, 2)\n",
     "    r_sq = np.round(np.array([r2_score(simulations, evaluations)]), 2)\n",
     "\n",
     "    stat_dict = {\n",
-    "        'KGE': kge[0],\n",
-    "        'NSE': nse[0],\n",
-    "        'Pbias': pbias[0], \n",
-    "        'RMSE': rmse[0], \n",
-    "        'R^2': r_sq[0], \n",
-    "        'R': r[0], \n",
-    "        'Alpha': alpha[0], \n",
-    "        'Beta': beta[0],\n",
-    "        'RSR': rsr[0],\n",
-    "            }\n",
-    "\n",
-    "    df = pd.DataFrame(stat_dict, index = [0])\n",
+    "        \"KGE\": kge[0],\n",
+    "        \"NSE\": nse[0],\n",
+    "        \"Pbias\": pbias[0],\n",
+    "        \"RMSE\": rmse[0],\n",
+    "        \"R^2\": r_sq[0],\n",
+    "        \"R\": r[0],\n",
+    "        \"Alpha\": alpha[0],\n",
+    "        \"Beta\": beta[0],\n",
+    "        \"RSR\": rsr[0],\n",
+    "    }\n",
+    "\n",
+    "    df = pd.DataFrame(stat_dict, index=[0])\n",
     "\n",
-    "    \n",
     "    return df"
    ]
   },
@@ -264,8 +268,10 @@
    "outputs": [],
    "source": [
     "#### READ table (.csv) of HRU calibration level file\n",
-    "hru_cal_levels_df = pd.read_csv(f'{NHM_dir}/nhm_v11_calibration_levels.csv').fillna(0)\n",
-    "hru_cal_levels_df['hw_id'] = hru_cal_levels_df.hw_id.astype('int64')"
+    "hru_cal_levels_df = pd.read_csv(\n",
+    "    r\"data_dependencies/NHM_v1_1/nhm_v1_1_HRU_cal_levels.csv\"\n",
+    ").fillna(0)\n",
+    "hru_cal_levels_df[\"hw_id\"] = hru_cal_levels_df.hw_id.astype(\"int64\")"
    ]
   },
   {
@@ -274,18 +280,30 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "\n",
-    "hru_cal_levels_df = pd.merge(hru_cal_levels_df, hru_gdf, right_on = 'nhm_id', left_on = 'nhm_id')\n",
-    "hru_cal_levels_gdf =gpd.GeoDataFrame(hru_cal_levels_df, geometry = 'geometry')# Creates a Geopandas GeoDataFrame\n",
-    "hru_cal_levels_gdf['nhm_id'] = hru_cal_levels_gdf['nhm_id'].astype(str)\n",
-    "hru_cal_levels_gdf['hw_id'] = hru_cal_levels_gdf['hw_id'].astype(str)\n",
-    "\n",
-    "print('The number of HRUs in the byHRU calibration is', hru_cal_levels_gdf[hru_cal_levels_gdf['level'] >0]['level'].count())\n",
-    "print('The number of HRUs in the byHW calibration is', hru_cal_levels_gdf[hru_cal_levels_gdf['level'] >1]['level'].count())\n",
-    "print('The number of HRUs in the byHWobs calibration is', hru_cal_levels_gdf[hru_cal_levels_gdf['level'] >2]['level'].count())\n",
+    "hru_cal_levels_df = pd.merge(\n",
+    "    hru_cal_levels_df, hru_gdf, right_on=\"nhm_id\", left_on=\"nhm_id\"\n",
+    ")\n",
+    "hru_cal_levels_gdf = gpd.GeoDataFrame(\n",
+    "    hru_cal_levels_df, geometry=\"geometry\"\n",
+    ")  # Creates a Geopandas GeoDataFrame\n",
+    "hru_cal_levels_gdf[\"nhm_id\"] = hru_cal_levels_gdf[\"nhm_id\"].astype(str)\n",
+    "hru_cal_levels_gdf[\"hw_id\"] = hru_cal_levels_gdf[\"hw_id\"].astype(str)\n",
+    "\n",
+    "print(\n",
+    "    \"The number of HRUs in the byHRU calibration is\",\n",
+    "    hru_cal_levels_gdf[hru_cal_levels_gdf[\"level\"] > 0][\"level\"].count(),\n",
+    ")\n",
+    "print(\n",
+    "    \"The number of HRUs in the byHW calibration is\",\n",
+    "    hru_cal_levels_gdf[hru_cal_levels_gdf[\"level\"] > 1][\"level\"].count(),\n",
+    ")\n",
+    "print(\n",
+    "    \"The number of HRUs in the byHWobs calibration is\",\n",
+    "    hru_cal_levels_gdf[hru_cal_levels_gdf[\"level\"] > 2][\"level\"].count(),\n",
+    ")\n",
     "\n",
     "\n",
-    "#hru_cal_levels_df #View results to verify"
+    "# hru_cal_levels_df #View results to verify"
    ]
   },
   {
@@ -301,11 +319,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "byHW_basins_gdf = hru_cal_levels_gdf.loc[hru_cal_levels_gdf['byHW'] == 1]\n",
-    "HW_basins_gdf = byHW_basins_gdf.dissolve(by='hw_id').to_crs(crs)\n",
+    "byHW_basins_gdf = hru_cal_levels_gdf.loc[hru_cal_levels_gdf[\"byHW\"] == 1]\n",
+    "HW_basins_gdf = byHW_basins_gdf.dissolve(by=\"hw_id\").to_crs(crs)\n",
     "HW_basins_gdf.reset_index(inplace=True, drop=False)\n",
     "HW_basins = HW_basins_gdf.boundary\n",
-    "#HW_basins_gdf"
+    "# HW_basins_gdf"
    ]
   },
   {
@@ -323,32 +341,38 @@
    "source": [
     "# This reads in the csv file that hase the gages used to calibrate the byHWobs part for CONUS.\n",
     "# Read in station file columns needed (You may need to tailor this to the particular file.\n",
-    "col_names = ['poi_id',\n",
-    "                 #'poi_name',\n",
-    "                 'latitude',\n",
-    "                 'longitude',\n",
-    "                 #'drainage_area',\n",
-    "                 #'drainage_area_contrib'\n",
-    "                ]\n",
-    "col_types = [np.str_,\n",
-    "                #np.str_,\n",
-    "                float,\n",
-    "                float,\n",
-    "                #float,\n",
-    "                #float\n",
-    "            ]\n",
-    "cols = dict(zip(col_names, col_types))# Creates a dictionary of column header and datatype called below.\n",
-    "\n",
-    "byHWobs_poi_df = pd.read_csv(f'{NHM_dir}/nhm_v11_hwobs_pois.csv', sep='\\t', dtype=cols).fillna(0)\n",
-    "\n",
-    "#byHWobs_poi_df = pd.read_csv(f'{NHM_dir}/nhm_v11_hwobs_pois.csv', sep='\\t').fillna(0)\n",
-    "#byHWobs_poi_df['poi_id'] = byHWobs_poi_df.poi_id.astype('str') # makes sure that this is a string, \n",
+    "col_names = [\n",
+    "    \"poi_id\",\n",
+    "    #'poi_name',\n",
+    "    \"latitude\",\n",
+    "    \"longitude\",\n",
+    "    #'drainage_area',\n",
+    "    #'drainage_area_contrib'\n",
+    "]\n",
+    "col_types = [\n",
+    "    np.str_,\n",
+    "    # np.str_,\n",
+    "    float,\n",
+    "    float,\n",
+    "    # float,\n",
+    "    # float\n",
+    "]\n",
+    "cols = dict(\n",
+    "    zip(col_names, col_types)\n",
+    ")  # Creates a dictionary of column header and datatype called below.\n",
+    "\n",
+    "byHWobs_poi_df = pd.read_csv(\n",
+    "    r\"data_dependencies/NHM_v1_1/nhm_v1_1_byhwobs_cal_gages.csv\", sep=\"\\t\", dtype=cols\n",
+    ").fillna(0)\n",
+    "\n",
+    "# byHWobs_poi_df = pd.read_csv(f'{NHM_dir}/nhm_v11_hwobs_pois.csv', sep='\\t').fillna(0)\n",
+    "# byHWobs_poi_df['poi_id'] = byHWobs_poi_df.poi_id.astype('str') # makes sure that this is a string,\n",
     "# must have the leading zeros; suggest a more formal read and set like used in prev notebook.\n",
     "\n",
     "# Identify the byHWobs calibration gages in our current poi database (ammended in the model prams file to include more gages)\n",
-    "poi_df['nhm_calib'] = \"N\"\n",
-    "poi_df.loc[poi_df['poi_id'].isin(byHWobs_poi_df['poi_id']), 'nhm_calib'] = \"Y\"\n",
-    "#poi_df.head()"
+    "poi_df[\"nhm_calib\"] = \"N\"\n",
+    "poi_df.loc[poi_df[\"poi_id\"].isin(byHWobs_poi_df[\"poi_id\"]), \"nhm_calib\"] = \"Y\"\n",
+    "# poi_df.head()"
    ]
   },
   {
@@ -374,10 +398,10 @@
    "outputs": [],
    "source": [
     "# Set WY start and stop times, and output variable needed for slicing the time series data for plotting\n",
-    "WY_start = '1979-10-01'\n",
-    "WY_end = '2021-09-30'\n",
+    "WY_start = \"1979-10-01\"\n",
+    "WY_end = \"2021-09-30\"\n",
     "# Note that the model start and stop times in the control file should be the same as the observation start and stop times.\n",
-    "output_var_sel = 'seg_outflow' # Select this output variable to read"
+    "output_var_sel = \"seg_outflow\"  # Select this output variable to read"
    ]
   },
   {