Skip to content

Commit

Permalink
Merge pull request #76 from edgi-govdata-archiving/cross-program
Browse files Browse the repository at this point in the history
new functions supporting ECHO-Cross-Program notebook
  • Loading branch information
ericnost authored Apr 17, 2024
2 parents c907a1e + 63a6bc8 commit 6b85c4f
Showing 1 changed file with 39 additions and 9 deletions.
48 changes: 39 additions & 9 deletions ECHO_modules/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,27 +268,32 @@ def show_data_set_widget( data_sets ):
return data_set_widget


def show_fac_widget( fac_series ):
def show_fac_widget( fac_series, top_violators ):
'''
Create and return a dropdown list of facilities from the
input Series
input Series. Pre-select the facilities identified in
top_violators.
Parameters
----------
fac_series : Series
The facilities to be shown. It may have duplicates.
top_violators : Dataframe
The top violators in the region.
Returns
-------
widget
The widget with facility names
'''

selected = list(set(fac_series) & set(top_violators))
fac_list = fac_series.dropna().unique()
fac_list.sort()
style = {'description_width': 'initial'}
widget=widgets.SelectMultiple(
options=fac_list,
value=selected,
style=style,
layout=Layout(width='70%'),
description='Facility Name:',
Expand Down Expand Up @@ -795,7 +800,7 @@ def write_dataset( df, base, type, state, regions ):
The region type of the data
state: str
The state, or None
regions: list
regions: tuple
The region identifiers, e.g. CD number, County, State, Zip code
'''
if ( df is not None and len( df ) > 0 ):
Expand All @@ -807,12 +812,9 @@ def write_dataset( df, base, type, state, regions ):
filename += '-' + type

if ( regions is not None ):
regions = ''.join(regions.split())
regions = ",".join(map(lambda x: "\'" + str(x) + "\'", regions.split(',')))
filename += str(regions)
for region in regions:
filename += '-' + str(region)
filename = filename.replace('\'', '').replace(',', '-')
# for region in regions:
# filename += '-' + str(region)
filename = urllib.parse.quote_plus(filename, safe='/')
filename += '.csv'
df.to_csv( filename )
Expand Down Expand Up @@ -912,6 +914,34 @@ def get_top_violators( df_active, flag, noncomp_field, action_field, num_fac=10
df_active = df_active.head( num_fac )
return df_active


def get_tri_ghg_violators(df_active, field, num_violators):
df = df_active.loc[df_active[field] > 0]
df_a = df.copy()
df_a = df_a[['FAC_NAME', field, 'DFR_URL', 'FAC_LAT', 'FAC_LONG']]
df_a = df_a.sort_values(by=[field], ascending=False)
df_a = df_a.head(num_violators)
return df_a

# def get_sdwa_violators(df_active, num_violators):
# use SDWA_FORMAL_ACTION_COUNT ?


def chart_tri_ghg_violators(df, field, title, xlabel):
unit = df.index
values = df[field]
sns.set(style='whitegrid')
fig, ax = plt.subplots(figsize=(10,10))
try:
g = sns.barplot(x=values, y=unit, order=list(unit), orient='h', color=colour)
g.set_title(title)
ax.set_xlabel(xlabel)
ax.set_ylabel('Facility')
ax.set_yticklabels(df['FAC_NAME'])
except TypeError as te:
print("TypeError: {}".format(str(te)))


def chart_top_violators( ranked, state, selections, epa_pgm ):
'''
Draw a horizontal bar chart of the top non-compliant facilities.
Expand Down

0 comments on commit 6b85c4f

Please sign in to comment.