Skip to content

Commit

Permalink
Merge pull request #11 from Grist-Data-Desk/feat/add-clipping-filteri…
Browse files Browse the repository at this point in the history
…ng-column-selection

feat: Integrate code to clip parcels to reservation boundaries, filter columns by acreage and additional criteria in METHODOLOGY.md, and concatenate activity_info and activity_info_2 columns.
  • Loading branch information
clayton-aldern authored Aug 6, 2024
2 parents 2c8c334 + 3ae0b79 commit 428c50c
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 4 deletions.
25 changes: 24 additions & 1 deletion stlor/activity.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
ACTIVITY_REWRITE_RULES,
ACTIVITY_RIGHTS_TYPE,
)
from stlor.constants import STATE, RIGHTS_TYPE, ACTIVITY
from stlor.constants import STATE, RIGHTS_TYPE, ACTIVITY, ACTIVITY_INFO, ACTIVITY_INFO_2
from stlor.entities import StateActivityDataSource, RightsType


Expand Down Expand Up @@ -229,3 +229,26 @@ def capture_lessee_and_lease_type(
activity_info["lease_status"] = activity_row[key]

return fmt_single_activity_info(activity_info)


def concatenate_activity_info(row: dict) -> str:
"""Concatenate the activity_info and activity_info_2 columns, using \n to
join the two strings.
Arguments:
row -- a single parcel record, represented as a dictionary
Returns:
str -- the concatenated activity information string
"""
activity_info = row[ACTIVITY_INFO].strip()
activity_info_2 = row[ACTIVITY_INFO_2].strip()

if activity_info and activity_info_2:
return f"{activity_info}\n{activity_info_2}"
elif activity_info:
return activity_info
elif activity_info_2:
return activity_info_2
else:
return ""
72 changes: 72 additions & 0 deletions stlor/clip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import logging
from pathlib import Path

import geopandas as gpd

from stlor.constants import (
NAD_83_CONUS_ALBERS,
SQUARE_METERS_PER_ACRE,
CLIPPED_ACRES,
STATE,
RESERVATION_NAME,
)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def clip_to_reservation_boundaries(stl_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
"""Clip state trust lands to reservation boundaries.
Arguments:
stl_gdf -- the state trust lands GeoDataFrame
Returns:
gpd.GeoDataFrame -- the clipped state trust lands GeoDataFrame
"""
reservations_gdf = gpd.read_file(
Path("public_data/00_Reservation Layer/BIA_AIAN+OK_Fixed.geojson").resolve()
)
supp_reservations_gdf = gpd.read_file(
Path("public_data/00_Reservation Layer/BIA-supp.geojson").resolve()
)

# Reproject all GeoDataFrames to NAD83 Conus Albers.
logger.info("Reprojecting STL and BIA_AIAN GeoDataFrames to NAD83 Conus Albers.")
stl_gdf = stl_gdf.to_crs(NAD_83_CONUS_ALBERS)
reservations_gdf = reservations_gdf.to_crs(NAD_83_CONUS_ALBERS)
supp_reservations_gdf = supp_reservations_gdf.to_crs(NAD_83_CONUS_ALBERS)

# Union the reservations_gdf and supp_reservations_gdf to a single layer in
# prepartion for the clipping operation.
logger.info("Unioning the BIA_AIAN primary and supplemental GeoDataFrames.")
reservations_gdf = reservations_gdf.overlay(supp_reservations_gdf, how="union")

logger.info("Clipping the STL GeoDataFrame to reservation boundaries.")
stl_gdf = stl_gdf.clip(reservations_gdf)

logger.info("Calculating the area of the clipped state trust lands.")
stl_gdf[CLIPPED_ACRES] = (stl_gdf.area / SQUARE_METERS_PER_ACRE).round(2)

return stl_gdf


def filter_parcels_by_acreage(stl_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
"""Filter parcels with either of the following characteristics:
1. "clipped_acres" < 10.0
- Note that this includes parcels with "clipped_acres" equal to 0.0.
2. WY parcels with "reservation_name" == "Crow"
Arguments:
stl_gdf -- the state trust lands GeoDataFrame
Returns:
gpd.GeoDataFrame -- the filtered state trust lands GeoDataFrame
"""
stl_gdf = stl_gdf[
(stl_gdf[CLIPPED_ACRES] >= 10.0)
& ~((stl_gdf[STATE] == "WY") & (stl_gdf[RESERVATION_NAME] == "Crow"))
]

return stl_gdf
37 changes: 36 additions & 1 deletion stlor/constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,41 @@
# Column names.
# Column names
OBJECT_ID = "object_id"
STATE = "state"
RIGHTS_TYPE = "rights_type"
ACTIVITY = "activity"
ACTIVITY_INFO = "activity_info"
ACTIVITY_INFO_2 = "activity_info_2"
GEOMETRY = "geometry"
CLIPPED_ACRES = "clipped_acres"
RESERVATION_NAME = "reservation_name"

# Projections
NAD_83_CONUS_ALBERS = "EPSG:5070"

# Units
SQUARE_METERS_PER_ACRE = 4046.8564224

# Output columns
FINAL_DATASET_COLUMNS = [
OBJECT_ID,
STATE,
"managing_agency",
"state_enabling_act",
"trust_name",
RESERVATION_NAME,
RIGHTS_TYPE,
"rights_type_info",
"acres",
"gis_acres",
"net_acres",
CLIPPED_ACRES,
ACTIVITY,
ACTIVITY_INFO,
"county",
"meridian",
"township",
"range",
"section",
"aliquot",
GEOMETRY,
]
70 changes: 68 additions & 2 deletions stlor/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,18 @@
is_compatible_activity,
exclude_inactive,
capture_lessee_and_lease_type,
concatenate_activity_info,
)
from stlor.clip import clip_to_reservation_boundaries, filter_parcels_by_acreage
from stlor.config import STATE_ACTIVITIES
from stlor.constants import ACTIVITY_INFO, RIGHTS_TYPE, ACTIVITY
from stlor.constants import (
ACTIVITY_INFO,
ACTIVITY_INFO_2,
ACTIVITY,
FINAL_DATASET_COLUMNS,
OBJECT_ID,
RIGHTS_TYPE,
)
from stlor.entities import StateActivityDataSource
from stlor.overlap import tree_based_proximity
from stlor.utils import in_parallel, combine_delim_list
Expand Down Expand Up @@ -144,6 +153,22 @@ def remove_timber_rows(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
return gdf[gdf[RIGHTS_TYPE].str.lower() != "timber"]


def join_activity_info(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
"""Join the activity_info and activity_info_2 columns into a single column.
Arguments:
gdf -- the state trust lands GeoDataFrame
Returns:
gpd.GeoDataFrame -- the state trust lands GeoDataFrame with a cleaned
activity_info column
"""
gdf[ACTIVITY_INFO] = gdf.apply(concatenate_activity_info, axis=1)
gdf = gdf.drop(ACTIVITY_INFO_2, axis=1)

return gdf


def main(activities_dir: Path, stl_path: Path, output_dir: Path):
"""Match state trust lands parcels to land use activities.
Expand Down Expand Up @@ -201,11 +226,52 @@ def main(activities_dir: Path, stl_path: Path, output_dir: Path):
logger.info(f"STLoR row count after removing timber parcels: {stl_gdf.shape[0]}")

# Write the output of the activity match process to disk.
logger.info(f"Final STLoR row count: {stl_gdf.shape[0]}")
logger.info("Writing activity match output to 03_ActivityMatch.{csv,xlsx,geojson}")
stl_gdf.to_csv(output_dir / "03_ActivityMatch.csv", index=False)
stl_gdf.to_excel(output_dir / "03_ActivityMatch.xlsx", index=False)
stl_gdf.to_file(output_dir / "03_ActivityMatch.geojson", driver="GeoJSON")

# Clip the state trust lands to reservation boundaries.
logger.info("Clipping state trust lands to reservation boundaries.")
start_time = datetime.now()
stl_gdf = clip_to_reservation_boundaries(stl_gdf)
logger.info(
f"Clipping state trust lands to reservation boundaries took {datetime.now() - start_time}"
)

# Write the clipped state trust lands to disk.
logger.info("Writing clipped state trust lands to 04_Clipped.{csv,xlsx,geojson}")
stl_gdf.to_csv(output_dir / "04_Clipped.csv", index=False)
stl_gdf.to_excel(output_dir / "04_Clipped.xlsx", index=False)
stl_gdf.to_file(output_dir / "04_Clipped.geojson", driver="GeoJSON")

# Filter parcels by acreage.
logger.info("Filtering parcels to those with acreage greater than 10.")
stl_gdf = filter_parcels_by_acreage(stl_gdf)

# Write the filtered state trust lands to disk.
logger.info(
"Writing filtered state trust lands to 05_AcreageGreaterThan10.{csv,xlsx,geojson}"
)
stl_gdf.to_csv(output_dir / "05_AcreageGreaterThan10.csv", index=False)
stl_gdf.to_excel(output_dir / "05_AcreageGreaterThan10.xlsx", index=False)
stl_gdf.to_file(output_dir / "05_AcreageGreaterThan10.geojson", driver="GeoJSON")

# Clean up the activity_info and object_id columns and select a subset of
# columns for the final dataset.
logger.info("Cleaning up activity_info and object_id columns.")
stl_gdf = join_activity_info(stl_gdf)
stl_gdf[OBJECT_ID] = stl_gdf["object_id_LAST"]
stl_gdf = stl_gdf[FINAL_DATASET_COLUMNS]

# Write the final dataset to disk.
logger.info("Writing final dataset to 06_STLoRs.{csv,xlsx,geojson}")
stl_gdf.to_csv(output_dir / "06_STLoRs.csv", index=False)
stl_gdf.to_excel(output_dir / "06_STLoRs.xlsx", index=False)
stl_gdf.to_file(output_dir / "06_STLoRs.geojson", driver="GeoJSON")

logger.info(f"Final STLoR row count: {stl_gdf.shape[0]}")


def run():
"""Run the activity match process."""
Expand Down

0 comments on commit 428c50c

Please sign in to comment.