diff --git a/class.py b/class.py index 2d041c6..b5db0c8 100644 --- a/class.py +++ b/class.py @@ -1,14 +1,19 @@ import ECHO_modules.utilities as utilities import ECHO_modules.presets as presets -import geopandas as geopandas -import topojson as tp -import rtree import json +import geopandas as geopandas +import pygeos +import ipyleaflet import folium from folium.plugins import FastMarkerCluster import urllib import pandas as pd import requests +import zipfile +import io +import seaborn as sns +import matplotlib.pyplot as plt +from IPython.core.display import display, HTML class Echo: def __init__( self, units, unit_type, programs=[], intersection=False, intersecting_geo=None): @@ -821,3 +826,92 @@ def batch(p, id_string, program_data): program_data['Date'] = program_data['EARLIEST_FRV_DETERM_DATE'].fillna(program_data['HPV_DAYZERO_DATE']) return program_data + +class EJScreen: + """ + Class for creating EJScreen analysis objects around a location (lat/lng) + EJScreen objects host a variety of methods for collecting, analyzing, and display EJScreen data + Currently hard-coded for New Jersey + """ + def __init__( self , location=None): + import ipywidgets as widgets + + # Load and join census data + self.census_data = utilities.add_spatial_data(url="https://www2.census.gov/geo/tiger/TIGER2017/BG/tl_2017_34_bg.zip", name="census", projection=26918) # NJ specific + self.ej_data = pd.read_csv("https://github.com/edgi-govdata-archiving/ECHO-SDWA/raw/main/EJSCREEN_2021_StateRankings_NJ.csv") # NJ specific + self.ej_data["ID"] = self.ej_data["ID"].astype(str) + self.census_data.set_index("GEOID", inplace=True) + self.ej_data.set_index("ID", inplace=True) + self.census_data = self.census_data.join(self.ej_data) + + # EJ variable picking parameters + self.pick_ejvar = None + self.picker = widgets.Output() + self.options = ["LOWINCPCT", "MINORPCT", "OVER64PCT", "CANCER"] # list of EJScreen variables that will be selected + display(self.picker) + self.out = widgets.Output() + display(self.out) + + self.location = location # Should be a single shapely geometry (point or polygon) + + def show_pick_variable (self): + import ipywidgets as widgets + self.pick_ejvar = widgets.Dropdown( + options=self.options, + description='EJ Variable:' + ) + with self.picker: + display(self.pick_ejvar) + display(HTML("

see also for details on each variable: Metadata")) + self.pick_ejvar.observe(self.make_map) + + # map + def make_map (self, change): + if self.location is not None: + if change['type'] == 'change' and change['name'] == 'value': + import branca + from ipyleaflet import Map, basemaps, basemap_to_tiles, GeoJSON, LayersControl + import json + + # get EJ variable + ejvar = self.pick_ejvar.value + + # filter to area + bgs = self.census_data[ self.census_data.geometry.intersects(self.location.buffer(10000)[0]) ] #block groups in the area around the clicked point + + # set colorscale + colorscale = branca.colormap.linear.YlOrRd_09.scale(bgs[ejvar].min(), bgs[ejvar].max()) + + # set layers and style + def style_function(feature): + # choropleth approach + return { + "fillOpacity": .5, + "weight": .5, + "fillColor": "#d3d3d3" if feature["properties"][ejvar] is None else colorscale(feature["properties"][ejvar]), + } + + # Create the map + m = Map( + basemap=basemap_to_tiles(basemaps.CartoDB.Positron), + ) + + # Load the layer + bgs = bgs.to_crs(4326) # transformation to geographic coordinate system required + geo_json = GeoJSON( + data = json.loads(bgs.to_json()), + style_callback = style_function + ) + m.add_layer(geo_json) + + # fits the map to the polygon layer + bounds = bgs.total_bounds + bounds = [[bounds[1], bounds[0]], [bounds[3], bounds[2]]] + m.fit_bounds(bounds) + m.zoom = 13 + + m.add_control(LayersControl()) # add a control for toggling layers on/off + + with self.out: + self.out.clear_output() + display(m) \ No newline at end of file diff --git a/utilities.py b/utilities.py index d71bceb..c2d7fc6 100644 --- a/utilities.py +++ b/utilities.py @@ -4,16 +4,15 @@ ''' # Import libraries - import os import csv import datetime import pandas as pd import numpy as np -import ipywidgets as widgets import urllib +import ipywidgets as widgets from ipywidgets import interact, interactive, fixed, interact_manual, Layout -from IPython.display import display +from IPython.core.display import display, HTML def get_data( sql, index_field=None ): ''' @@ -48,6 +47,313 @@ def get_data( sql, index_field=None ): # print( "get_data() returning {} records.".format( len(ds) )) return ds +def place_picker(): + ''' + Allow users to pick a point on an ipyleaflet map and store those coordinates in `marker` + + Parameters + ---------- + None + + Returns + ---------- + Displays an ipyleaflet map with a moveable marker + Returns the marker, whose current coordinates can be accessed with marker.location + ''' + from ipyleaflet import Map, Marker, basemaps, basemap_to_tiles + + m = Map( + basemap=basemap_to_tiles(basemaps.CartoDB.Positron), + center=(40, -74), + zoom=7 + ) # Default to NJ + + marker = Marker(location=(40, -74), draggable=True) # defaults to New Jersey for SDWA project + m.add_layer(marker); + + display(m) + + return marker + +def add_spatial_data(url, name, projection=4326): + """ + Gets external geospatial data + + Parameters + ---------- + url: a zip of shapefile (in the future, extend to geojson) + name: a string handle for the data files + projection (optional): an EPSG projection for the spatial dataa + + Returns + ------- + sd: spatial data reads ]as a geodataframe and projected to a specified projected coordinate system, or defaults to GCS + + """ + import requests, zipfile, io, geopandas + + r = requests.get(url) + z = zipfile.ZipFile(io.BytesIO(r.content)) + z.extractall("/content/"+name) + sd = geopandas.read_file("/content/"+name+"/") + sd.to_crs(crs=projection, inplace=True) # transform to input projection, defaults to WGS GCS + return sd + +def place_by_point(places, point): + """ + get the place that contains a point + + Parameters + ---------- + place: a geopandas geodataframe of polygons + point: a geopandas geoseries (in the future, extend to gdf) + + Returns + ------- + Filtered geodataframe + """ + + # Try to find overlap. If none, report it + place = places.loc[places.geometry.contains(point[0])] + if place.empty: + print("Looks like your point doesn't fall within any of the places") + else: + return place # Theoretically, for some data, multiple places could contain a point (extend) + +def show_map(layers, style): + """ + maps layers according to styles + + Parameters + ---------- + layers: a dict like {"places": places} where places is geojson, ordered in the z order you want + styles: a dict like {"places": {"fill": none}} with other leaflet style properties + + Returns + ---------- + Displays an ipyleaflet map + + """ + from ipyleaflet import Map, basemaps, basemap_to_tiles, GeoData, LayersControl + + m = Map( + basemap=basemap_to_tiles(basemaps.CartoDB.Positron), + ) + + for name, layer in layers.items(): + layer = layer.to_crs(4326) # transformation to geographic coordinate system required for mapping + l = GeoData( + geo_dataframe = layer, + name = name, + style = style[name] + ) + m.add_layer(l) + + # hacky - iteratively fits the map frame to the bounds/extent of the layers + bounds = layer.total_bounds + bounds = [[bounds[1], bounds[0]], [bounds[3], bounds[2]]] + m.fit_bounds(bounds) + + m.zoom = 13 + + m.add_control(LayersControl()) # adds a toggle for the layers + + display(m) + +def get_data_from_ids(table, key, input): + """ + Gets ECHO data from table based on matching ids + + Parameters + ---------- + input: dataframe to get ids out of + table: str, ECHO table of interest + key: str, the field to match ids on + currently only works where column names are same between input and table (extend) + + Returns + ---------- + dataframe from SBU ECHO database copy + + """ + from ECHO_modules.utilities import get_data + + # process ids + ids = "" + for id in list(input[key].unique()): + ids += "'"+id +"'," + ids = ids[:-1] + ids + + # get data + sql = 'select * from "'+table+'" where "'+key+'" in ({})'.format(ids) + data = get_data(sql) + + return data + +def chart_top_violators(ranked, values, size, labels): + ''' + rank and chart entities based on a provided dataframe + + Parameters + ---------- + ranked: dataframe sorted by the variable of interest (# of violations) + values: str, name of variable (column) of interest + size: int, how many of the top rows/entities to rank + labels: dict, labels for the chart e.g. {'title': 'title', 'x': 'x-axis', 'y':'y-axis'} + + Returns + ---------- + Matplotlib chart + + ''' + import seaborn as sns + import matplotlib.pyplot as plt + + if ranked is None: + print( 'There is no data to graph.') + return None + + # Process data + ranked = ranked.head(size) + unit = ranked.index + values = ranked[values] + + # Create chart + sns.set(style='whitegrid') + fig, ax = plt.subplots(figsize=(10,10)) + try: + g = sns.barplot(x=values, y=unit, order=list(unit), orient="h", color = "red") + g.set_title(labels["title"]) + ax.set_xlabel(labels["x"]) + ax.set_ylabel(labels["y"]) + ax.set_yticklabels(unit) + return ( g ) + except TypeError as te: + print( "TypeError: {}".format( str(te) )) + return None + +def bivariate_map(points, point_attribute, polygons, polygon_attribute): + """ + Creates a bivariate map consisting of a point and a polygon layer symbolized according to specified attributes + Parameters + ---------- + points: geodataframe of point features + point_attribute: str, indicating the field in the `points` to symbolize + polygons: geodataframe of polygon features + polygon_attribute: str, indicating the field in the `polygons` to symbolize + """ + import branca + from ipyleaflet import Map, basemaps, basemap_to_tiles, GeoJSON, LayersControl, LayerGroup, Circle + import json + + # set colorscale + colorscale = branca.colormap.linear.YlOrRd_09.scale(polygons[polygon_attribute].min(), polygons[polygon_attribute].max()) + + # set layers and style + def style_function(feature): + """ + Assumes a geojson as input feature + """ + return { + "fillOpacity": .5, + "weight": .1, + "fillColor": "#d3d3d3" if feature["properties"][polygon_attribute] is None else colorscale(feature["properties"][polygon_attribute]), + } + + # Create map + m = Map( + basemap=basemap_to_tiles(basemaps.CartoDB.Positron), + center=(40,-74), + zoom = 7 + ) + + # Create polygon layer + polygons = polygons.to_crs(4326) # transformation to geographic coordinate system required + geo_json = GeoJSON( + data = json.loads(polygons.to_json()), # load as geojson + style_callback = style_function + ) + m.add_layer(geo_json) + + + # Create points layer + circles = [] + points = points.loc[points[point_attribute] > 0] # remove NaNs :( + points = json.loads(points.to_json()) # convert to geojson + for row in points["features"]: + try: + circle = Circle( + location = (row["properties"]["FAC_LAT"], row["properties"]["FAC_LONG"]), #Need to un-hard code this. Should be able to use geometry. + title = str(row["properties"][point_attribute]), + radius = int(row["properties"][point_attribute]) * 2, + color = "black", + weight = 1, + fill_color = "black", + fill_opacity= 1 + ) + circles.append(circle) + except: + pass + layer_group = LayerGroup(layers=(circles)) + m.add_layer(layer_group) + + # fits the map to the polygon layer + bounds = polygons.total_bounds + bounds = [[bounds[1], bounds[0]], [bounds[3], bounds[2]]] + m.fit_bounds(bounds) + + m.add_control(LayersControl()) # add control to toggle layers on/off + + display(m) + +def choropleth(polygons, polygon_attribute): + ''' + creates choropleth map - shades polygons by attribute + + Parameters + ---------- + polygons: geodataframe of polygons to be mapped + polygon_attribute: str, name of field in `polygons` geodataframe to symbolize + + Returns + ---------- + Displays a map + + ''' + import branca + import json + from ipyleaflet import Map, basemaps, basemap_to_tiles, Choropleth, LayersControl + + m = Map( + basemap=basemap_to_tiles(basemaps.CartoDB.Positron), + center=(40,-74), + zoom = 7 + ) + + # split data into geo and choro data for mapping + polygons = polygons.to_crs(4326) # requires transformation to geographic coordinate system + geo_data = json.loads(polygons[["geometry"]].to_json()) # convert to geojson + choro_data = polygons[[polygon_attribute]] # the attribute data + choro_data = json.loads(choro_data.to_json()) # convert to geojson + + # Create layer + layer = Choropleth( + geo_data=geo_data, + choro_data=choro_data[polygon_attribute], + colormap=branca.colormap.linear.YlOrRd_09, + border_color='black', + style={'fillOpacity': 0.5, 'weight': .1}, + key_on = "id" #leaflet default + ) + m.add_layer(layer) + + # fits the map to the layer + bounds = polygons.total_bounds + bounds = [[bounds[1], bounds[0]], [bounds[3], bounds[2]]] + m.fit_bounds(bounds) + + display(m) def fix_county_names( in_counties ): '''