From 63a6bc8d5d6619ab4b7b481babc87bdc88b9c991 Mon Sep 17 00:00:00 2001
From: shansen5 <steven.e.hansen@gmail.com>
Date: Sat, 13 Apr 2024 20:39:59 -0700
Subject: [PATCH] new functions supporting ECHO-Cross-Program notebook

---
 ECHO_modules/utilities.py | 48 +++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/ECHO_modules/utilities.py b/ECHO_modules/utilities.py
index cfad993..438a37c 100644
--- a/ECHO_modules/utilities.py
+++ b/ECHO_modules/utilities.py
@@ -268,27 +268,32 @@ def show_data_set_widget( data_sets ):
     return data_set_widget
 
 
-def show_fac_widget( fac_series ):
+def show_fac_widget( fac_series, top_violators ):
     '''
     Create and return a dropdown list of facilities from the 
-    input Series
+    input Series. Pre-select the facilities identified in
+    top_violators.
 
     Parameters
     ----------
     fac_series : Series
         The facilities to be shown.  It may have duplicates.
 
+    top_violators : Dataframe
+        The top violators in the region.
+
     Returns
     -------
     widget
         The widget with facility names
     '''
-
+    selected = list(set(fac_series) & set(top_violators))
     fac_list = fac_series.dropna().unique()
     fac_list.sort()
     style = {'description_width': 'initial'}
     widget=widgets.SelectMultiple(
         options=fac_list,
+        value=selected,
         style=style,
         layout=Layout(width='70%'),
         description='Facility Name:',
@@ -795,7 +800,7 @@ def write_dataset( df, base, type, state, regions ):
         The region type of the data
     state: str
         The state, or None
-    regions: list
+    regions: tuple
         The region identifiers, e.g. CD number, County, State, Zip code
     '''
     if ( df is not None and len( df ) > 0 ):
@@ -807,12 +812,9 @@ def write_dataset( df, base, type, state, regions ):
         filename += '-' + type
 
         if ( regions is not None ):
-            regions = ''.join(regions.split())
-            regions = ",".join(map(lambda x: "\'" + str(x) + "\'", regions.split(',')))
-        filename += str(regions)
+            for region in regions:
+                filename += '-' + str(region)
         filename = filename.replace('\'', '').replace(',', '-')
-#            for region in regions:
-#                filename += '-' + str(region)
         filename = urllib.parse.quote_plus(filename, safe='/')
         filename += '.csv'
         df.to_csv( filename ) 
@@ -912,6 +914,34 @@ def get_top_violators( df_active, flag, noncomp_field, action_field, num_fac=10
     df_active = df_active.head( num_fac )
     return df_active   
 
+
+def get_tri_ghg_violators(df_active, field, num_violators):
+    df = df_active.loc[df_active[field]  > 0]
+    df_a = df.copy()
+    df_a = df_a[['FAC_NAME', field, 'DFR_URL', 'FAC_LAT', 'FAC_LONG']]
+    df_a = df_a.sort_values(by=[field], ascending=False)
+    df_a = df_a.head(num_violators)
+    return df_a
+
+# def get_sdwa_violators(df_active, num_violators):
+# use SDWA_FORMAL_ACTION_COUNT ?
+    
+
+def chart_tri_ghg_violators(df, field, title, xlabel):
+    unit = df.index
+    values = df[field]
+    sns.set(style='whitegrid')
+    fig, ax = plt.subplots(figsize=(10,10))
+    try:
+      g = sns.barplot(x=values, y=unit, order=list(unit), orient='h', color=colour)
+      g.set_title(title)
+      ax.set_xlabel(xlabel)
+      ax.set_ylabel('Facility')
+      ax.set_yticklabels(df['FAC_NAME'])
+    except TypeError as te:
+      print("TypeError: {}".format(str(te)))
+
+
 def chart_top_violators( ranked, state, selections, epa_pgm ):
     '''
     Draw a horizontal bar chart of the top non-compliant facilities.