Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
johnisom committed Dec 13, 2023
1 parent edf0070 commit ca2b40f
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 41 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
data-backups/
__pycache__/
26 changes: 7 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Wildfire Analyzer


## Installation

Install git, install git lfs, then clone github repo to the computer.
Expand All @@ -15,30 +14,19 @@ Make sure you have 5 GB of free memory first.

## About

This project is a data analytics and prediction project that uses machine learning. It runs in jupyter notebooks.

The dataset I'm using is https://www.kaggle.com/datasets/rtatman/188-million-us-wildfires/data. I cleaned and formatted the data and selected only
that which I needed.
Reverse geocoding is made possible through the `pygris` python package. It is a package that downloads and reads the US Census Bureau's TIGRIS file on the shapes and locations of all counties and county equivalents in the USA, and I used that to place the location of latitude and longitude coordinates in specific counties, as part of my cleaning and formatting the data.

This is heavily work in progress and the requirements may change.
This project is a data analytics and prediction project that uses machine learning. It runs locally as a TK desktop application.

## Structure

stand alone app running on windows 10 with jupyter notebook and sqlite for data access.

## Requirements
## Credits

make the following descriptive (analytical) methods:
1. table showing ranking of counties that have the most fires and most acres burned by fires, per year.
a. alternatively, have it be a map with counties that are shaded according to number of fires (this is what i went with)
2. a pie chart that shows causes of wildfires in the US. the user can select an individual state if they wish, or all of the USA.
3. a confusion matrix to visualize the model's performance
### Wildfires Dataset

make the following non-descriptive (predictive) methods:
1. model that predicts fire size class based off of some parameters
a. suggested params are location (probably granular to county), date, reporting agency
The dataset I'm using is https://www.kaggle.com/datasets/rtatman/188-million-us-wildfires/data. I cleaned and formatted the data and selected only that which I needed, greatly reducing size.

## TODOS
### County-level Mapping

There are 1,880,465 data points which need to be split into training and test sets.
Reverse geocoding is made possible through the `pygris` python package.
It is a package that downloads and reads the US Census Bureau's TIGRIS file on the shapes and locations of all counties and county equivalents in the USA, and I used that to place the location of latitude and longitude coordinates in specific counties, as part of my cleaning and formatting the data. It is also used to display the map under the "Plots" tab of this application.
10 changes: 9 additions & 1 deletion src/bindings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from math import inf
import matplotlib.pyplot as plt
from .plotting import plot_causes_of_fires_by_number_of_fires, plot_causes_of_fires_by_total_area_burned, plot_counties_by_number_of_fires, plot_counties_by_total_area_burned
from .plotting import plot_causes_of_fires_by_number_of_fires, plot_causes_of_fires_by_total_area_burned, plot_counties_by_number_of_fires, plot_counties_by_total_area_burned, plot_fipscode_model_confusion_matrix, plot_lonlat_model_confusion_matrix
from .myutils import add_fire_count_to_counties, add_acres_burned_to_counties
from .location_info import get_counties_geodf
from .fires_info import get_fires_dataframe
Expand Down Expand Up @@ -74,3 +74,11 @@ def plot_everything(keys=('lower48',), year_start=None, year_end=None):
plot_causes_of_fires_by_number_of_fires(fires_df, keys, fire_cause_counts_title)
plot_counties_by_number_of_fires(counties_geo_df, keys, fire_counts_title)
plt.show()

def plot_lonlat_confusion_matrix():
plot_lonlat_model_confusion_matrix()
plt.show()

def plot_fipscode_confusion_matrix():
plot_fipscode_model_confusion_matrix()
plt.show()
4 changes: 1 addition & 3 deletions src/gui/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from .welcome import WelcomeFrame
from .plots import PlotsFrame
from .predictions import PredictionsFrame
from .credits import CreditsFrame, LicenseFrame
from .license import LicenseFrame
from ..prediction import FIPS_ENCODER_PATH, FIPS_MODEL_PATH, LONLAT_MODEL_PATH
from .custom_widgets import Title

Expand All @@ -20,7 +20,6 @@ def __init__(self, enable_predictions=True, **kwargs):
notebook = ttk.Notebook(self, padding=2)
welcome_frame = WelcomeFrame(notebook)
plots_frame = PlotsFrame(notebook)
credits_frame = CreditsFrame(notebook)
license_frame = LicenseFrame(notebook)
notebook.add(welcome_frame, text=welcome_frame.title)
notebook.add(plots_frame, text=plots_frame.title)
Expand All @@ -31,7 +30,6 @@ def __init__(self, enable_predictions=True, **kwargs):
no_ml_message = 'The "joblib-objects" directory was unable to be found, or the files within don\'t match the registered name for the predictive models.\n' \
'Make sure to have git-lfs installed and pull the latest version of the repository.'
self.warn(title='Unable to load ML predictive models!', message=no_ml_message)
notebook.add(credits_frame, text=credits_frame.title)
notebook.add(license_frame, text=license_frame.title)

# Set up the grid
Expand Down
14 changes: 1 addition & 13 deletions src/gui/credits.py → src/gui/license.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,7 @@
from pathlib import Path
from tkinter import *
from tkinter import ttk
from .custom_widgets import NotebookFrame, Title, Subtitle

class CreditsFrame(NotebookFrame):
title = 'Credits'

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

title = Title(self, text='Credits')

# Set items on the grid
title.grid(row=0, column=0, sticky=(N, E, W))
from .custom_widgets import NotebookFrame, Title

class LicenseFrame(NotebookFrame):
title = 'License'
Expand All @@ -31,4 +20,3 @@ def __init__(self, *args, **kwargs):
# Set items on the grid
title.grid(row=0, column=0, sticky=(N, E, W), pady='3')
license.grid(row=1, column=0, sticky=(S, E, W))

2 changes: 1 addition & 1 deletion src/gui/predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(self, *args, **kwargs):
self.set_up_state_county_variables()

title = Title(self, text='Predictions')
subframe = ttk.LabelFrame(self, text='Predict cause of fire in giving some parameters')
subframe = ttk.LabelFrame(self, text='Predict cause of fire by giving some parameters')

input_frame = ttk.Frame(subframe, padding=5)
discovery_datetime_label = ttk.Label(input_frame, text='Discovery datetime: ')
Expand Down
5 changes: 2 additions & 3 deletions src/gui/welcome.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def __init__(self, *args, **kwargs):

title = Title(self, text='Welcome')
intro_text = 'Welcome to Wildfire Analyzer! This application is a data analytics application for wildfires in the US.\n' \
'Other than this welcome page, there are 3 pages: Plots, Predictions, and Credits.'
'Other than this welcome page, there are 4 pages: Plots, Predictions, Credits, and License.'
intro_text_label = ttk.Label(self, text=intro_text, wraplength=630, justify=LEFT)
plots_subtitle = Subtitle(self, text='Plots')
plots_text = 'In the plots page you can plot pie charts of the causes of wildfires (such as lightning or arson) by number of fires started or by number of acres burned.\n' \
Expand All @@ -19,8 +19,7 @@ def __init__(self, *args, **kwargs):
plots_text_label = ttk.Label(self, text=plots_text, wraplength=630, justify=LEFT)
predictions_subtitle = Subtitle(self, text='Predictions')
predictions_text = 'Here you\'ll be able to predict the likely cause of a fire by giving the machine learning model some information about the fire: final size, county location, and date of discovery.\n' \
'The accuracy is 60.93% for the Longitude/Latitude model and 57.64% for the State/County model with 13 labels to choose from.\n' \
'That is actually rather good (random chance would be 7.69%).'
'The performance is 60.93% accuracy for the Longitude/Latitude model and 57.64% for the State/County model with 13 labels to choose from. That is actually rather good (random chance would be 7.69%).'
predictions_text_label = ttk.Label(self, text=predictions_text, wraplength=630, justify=LEFT)
credits_subtitle = Subtitle(self, text='Credits / License')
credits_text = 'The credits and license pages have more information about the author of the project, the license used, and credits for others where credits are due.'
Expand Down
29 changes: 29 additions & 0 deletions src/plotting.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from sklearn.metrics import ConfusionMatrixDisplay
from .fires_info import get_fires_dataframe
from .location_info import get_state_fips_codes
from .prediction import get_fips_encoder, get_fips_model, get_lonlat_model, STAT_CAUSE_CODE_TO_DESCR

def plot_counties_by_number_of_fires(counties_geo_df, keys, plot_title):
fips_codes = get_state_fips_codes(keys)
Expand Down Expand Up @@ -57,3 +60,29 @@ def plot_causes_of_fires_by_total_area_burned(fires_df, keys, plot_title):
fig.supxlabel(f'(Total: {total_sum:,.0f} acres)')
fig.tight_layout()
return fig, ax

def plot_lonlat_model_confusion_matrix():
model = get_lonlat_model()
fires_df = get_fires_dataframe()
df = fires_df[['fire_size', 'longitude', 'latitude', 'discovery_datetime', 'contained_datetime', 'stat_cause_code']].dropna()
X = df.drop('stat_cause_code', axis=1)
y = df['stat_cause_code'] - 1
fig, ax = plt.subplots(figsize=[12, 8])
ConfusionMatrixDisplay.from_predictions(y_true=y, y_pred=model.predict(X), normalize='true', xticks_rotation='vertical', display_labels=STAT_CAUSE_CODE_TO_DESCR.values(), values_format='.2f', ax=ax)
ax.set_title('Confusion Matrix for the Longitude/Latitude prediction model.')
fig.tight_layout()
return fig, ax

def plot_fipscode_model_confusion_matrix():
encoder = get_fips_encoder()
model = get_fips_model()
fires_df = get_fires_dataframe()
df = fires_df[['fire_size', 'combined_fips_code', 'discovery_datetime', 'contained_datetime', 'stat_cause_code']].dropna()
df.loc[:, ['combined_fips_code']] = encoder.transform(df['combined_fips_code'].astype(int))
X = df.drop('stat_cause_code', axis=1)
y = df['stat_cause_code'] - 1
fig, ax = plt.subplots(figsize=[10, 10])
ConfusionMatrixDisplay.from_predictions(y_true=y, y_pred=model.predict(X), normalize='true', xticks_rotation='vertical', display_labels=STAT_CAUSE_CODE_TO_DESCR.values(), values_format='.2f', ax=ax)
ax.set_title('Confusion Matrix for the State/County prediction model.')
fig.tight_layout()
return fig, ax

0 comments on commit ca2b40f

Please sign in to comment.