Skip to content

Commit

Permalink
Improve performance of geoindexing
Browse files Browse the repository at this point in the history
  • Loading branch information
johnisom committed Dec 13, 2023
1 parent 4fc361d commit f30f192
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 14 deletions.
1 change: 0 additions & 1 deletion src/fires_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

_fires_mutex = Lock()
_fires_df = None

def get_fires_dataframe():
global _fires_df
global _fires_mutex
Expand Down
7 changes: 4 additions & 3 deletions src/location_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

_counties_mutex = Lock()
_counties_geodf = None

def get_counties_geodf():
global _counties_geodf
global _counties_mutex
Expand All @@ -38,7 +37,6 @@ def get_counties_geodf():

_fips_codes_mutex = Lock()
_fips_codes_df = None

def get_fips_codes_dataframe():
global _fips_codes_df
global _fips_codes_mutex
Expand All @@ -65,4 +63,7 @@ def get_state_fips_codes(keys):

def are_coordinates_inside_usa(lon, lat):
counties_geodf = get_counties_geodf()
return len(counties_geodf.geometry.loc[counties_geodf.geometry.map(lambda geom: geom.covers(Point(lon, lat)))]) > 0
possible_matches_indices = counties_geodf.sindex.intersection((lon, lat))
possible_matches = counties_geodf.iloc[possible_matches_indices]
exact_matches = possible_matches[possible_matches.intersects(Point(lon, lat))]
return len(exact_matches) > 0
16 changes: 6 additions & 10 deletions src/oneshot/backfill_fips_codes.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import sqlite3
from rtree.index import Index
from functools import reduce
from shapely.geometry import Point
from ..location_info import get_counties_geodf

DB_FILENAME = 'db/fires.sqlite'

def backfill_location_information(dryrun=False):
print('loading counties')
counties = get_counties_geodf()
geoidx = counties.sindex
print('connecting to db')
con = sqlite3.connect(DB_FILENAME)
cur = con.cursor()
Expand All @@ -29,18 +29,14 @@ def backfill_location_information(dryrun=False):
for state_numeric_code, county_numeric_code, state_name, county_name in cur.execute('SELECT state_numeric_code, county_numeric_code, state_name, county_name FROM fips_codes'):
fips_codes_to_state_county_name[state_numeric_code + county_numeric_code] = (state_name, county_name)

# build index to do querying
print('building county shape index')
geoidx = Index()
for pid, poly in enumerate(counties['geometry'].values):
geoidx.insert(pid, poly.bounds)

# map the coordinates to the respective counties index
print('mapping longitude and latitude to county')
for elem in fires_coordinate_county_indices:
lon, lat = elem[1]
bounds = (lon, lat, lon, lat)
county_idx = next(geoidx.nearest(bounds)) # alt: list(geoindex.nearest(bounds))[0]
possible_matches_indices = geoidx.intersection((lon, lat))
possible_matches = counties.iloc[possible_matches_indices]
precise_matches = possible_matches[possible_matches.intersects(Point(lon, lat))]
county_idx = precise_matches.iloc[0].name
elem[2] = county_idx

# now we can update the county fips codes to the database
Expand Down

0 comments on commit f30f192

Please sign in to comment.