Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cached FIM - Part 1b - Deployment Fixes #622

Merged
merged 5 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ geopandas==0.11.1
SQLAlchemy==1.4.40
GeoAlchemy2==0.12.3
psycopg2-binary==2.9.3
pandas==2.1.4
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,5 @@ SELECT
fim.note
FROM {db_fim_table}_zero_stage AS fim
LEFT OUTER JOIN fim_cache.hand_hydrotable_cached_zero_stage AS hczs ON fim.hand_id = hczs.hand_id
WHERE hczs.rc_discharge_cms IS NULL;
WHERE hczs.rc_discharge_cms IS NULL
AND fim.rc_discharge_cms IS NOT NULL;
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,8 @@ def lambda_handler(event, context):
if os.path.exists(os.path.join("fim_configs", sql_file + '.sql')): #if there is product-specific fim_configs sql file, use it.
sql_files_to_run.append({"sql_file":sql_file, "folder": "fim_configs", "db_type":db_type})
else: # if not, use the fim_publish_template
folder = 'fim_caching_templates'
sql_file = '4_create_fim_config_publish_table'
sql_files_to_run.append({"sql_file":sql_file, "folder": folder, "db_type":db_type, "check_dependencies": False})
sql_templates_to_run = event['sql_templates_to_run']
sql_files_to_run.extend(sql_templates_to_run)

##########################################################

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ SELECT
buildings.prod_date,
buildings.source,
buildings.val_method,
fim.hydro_id,
flows.hydro_id,
flows.hydro_id::TEXT AS hydro_id_str,
flows.feature_id,
Expand All @@ -36,8 +35,8 @@ SELECT
buildings.prop_st as state,
max(fim.streamflow_cfs) AS max_flow_cfs,
avg(fim.streamflow_cfs) AS avg_flow_cfs,
max(fim.rc_stage_ft) AS max_fim_stage_ft,
avg(fim.rc_stage_ft) AS avg_fim_stage_ft,
max(fim.fim_stage_ft) AS max_fim_stage_ft,
avg(fim.fim_stage_ft) AS avg_fim_stage_ft,
count(buildings.build_id) AS buildings_impacted,
sum(buildings.sqfeet) AS building_sqft_impacted,
sum(CASE WHEN buildings.occ_cls = 'Agriculture' THEN 1 ELSE 0 END) AS bldgs_agriculture,
Expand Down Expand Up @@ -66,8 +65,8 @@ SELECT
TO_CHAR(hucs.huc10, 'fm0000000000') AS huc10_str,
max(fim.streamflow_cfs) AS max_flow_cfs,
avg(fim.streamflow_cfs) AS avg_flow_cfs,
max(fim.rc_stage_ft) AS max_fim_stage_ft,
avg(fim.rc_stage_ft) AS avg_fim_stage_ft,
max(fim.fim_stage_ft) AS max_fim_stage_ft,
avg(fim.fim_stage_ft) AS avg_fim_stage_ft,
count(buildings.build_id) AS buildings_impacted,
sum(buildings.sqfeet) AS building_sqft_impacted,
sum(CASE WHEN buildings.occ_cls = 'Agriculture' THEN 1 ELSE 0 END) AS bldgs_agriculture,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ SELECT
buildings.prod_date,
buildings.source,
buildings.val_method,
fim.hydro_id,
flows.hydro_id,
flows.hydro_id::TEXT AS hydro_id_str,
flows.feature_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ SELECT
buildings.prod_date,
buildings.source,
buildings.val_method,
fim.hydro_id,
flows.hydro_id,
flows.hydro_id::TEXT AS hydro_id_str,
flows.feature_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ SELECT
buildings.prod_date,
buildings.source,
buildings.val_method,
fim.hydro_id,
flows.hydro_id,
flows.hydro_id::TEXT AS hydro_id_str,
flows.feature_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ SELECT
buildings.prod_date,
buildings.source,
buildings.val_method,
fim.hydro_id,
flows.hydro_id,
flows.hydro_id::TEXT AS hydro_id_str,
flows.feature_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ SELECT
buildings.prod_date,
buildings.source,
buildings.val_method,
fim.hydro_id,
flows.hydro_id,
flows.hydro_id::TEXT AS hydro_id_str,
flows.feature_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ SELECT
buildings.prod_date,
buildings.source,
buildings.val_method,
fim.hydro_id,
flows.hydro_id,
flows.hydro_id::TEXT AS hydro_id_str,
flows.feature_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,30 +138,43 @@ def convert_folder_csvs_to_geospatial(folder_path, output_format='gpkg', clip_to
# this up, see the Configuring AWS CLI section of the Hydrovis viz Guide at https://docs.google.com/document/d/1UIbAQycG-mWw5XwDPDunkQED5O96YtsbrOA4MMZ9zmA/edit?usp=sharing

########## Specify your Args Here #############
sso_profile = None # The name of the AWS SSO profile you created, or set to None if you want to pull from the current environment of an EC2 machine (see notes above)
bucket_name = 'hydrovis-ti-fim-us-east-1' # Set this based on the hydrovis environment you are pulling from, e.g. 'hydrovis-ti-fim-us-east-1', 'hydrovis-uat-fim-us-east-1', 'hydrovis-prod-fim-us-east-1'
start_date = date(2022, 9, 17)
end_date = date(2022, 9, 20)
reference_times = ["1200"]
include_files_with = ["ref", "prvi"] # Anything you want to be included when filtering S3 files e.g ["ana", "mrf"] or ["mrf_"]
skip_files_with = [] # Anything you want to be skipped when filtering S3 files e.g. ["ana_streamflow", "rapid_onset_flooding"]
sso_profile = "prod" # The name of the AWS SSO profile you created, or set to None if you want to pull from the current environment of an EC2 machine (see notes above)
bucket_name = 'hydrovis-prod-fim-us-east-1' # Set this based on the hydrovis environment you are pulling from, e.g. 'hydrovis-ti-fim-us-east-1', 'hydrovis-uat-fim-us-east-1', 'hydrovis-prod-fim-us-east-1'
include_files_with = ["ana_inundation"] # Anything you want to be included when filtering S3 files e.g ["ana", "mrf"] or ["mrf_"]
skip_files_with = ["counties", "hucs", "building", "_hi.csv", "_prvi", "_public", "_src_skill"] # Anything you want to be skipped when filtering S3 files e.g. ["ana_streamflow", "rapid_onset_flooding"]
clip_to_states = [] # Provide a list of state abbreviations to clip to set states, e.g. ["AL", "GA", "MS"]
output_format = "gpkg" # Set to gpkg or shp - Can add any OGR formats, with some tweaks to the file_format logic in the functions above. BEWARE - large FIM files can be too large for shapefiles, and results may be truncated.
output_dir = r"C:\Users\arcgis\Desktop\Dev\VPP Data Requests" # Directory where you want output files saved.
output_dir = r"C:\Users\arcgis\Desktop\Dev\VPP Data Requests\AEP_2_1" # Directory where you want output files saved.
overwrite = False # This will automatically skip files that have already been downloaded and/or converted when running the script when set to False (default).
delete_csv = True # This will delete the csv files after conversion
###############################################

# Loop through days/hours specified
for day in daterange(start_date, end_date):
ref_date = day.strftime("%Y%m%d")
for reference_time in reference_times:
folder_name = f"viz_cache/{ref_date}/{reference_time}/"
destination_dir = fr"{output_dir}\{ref_date}\{reference_time}"
# Download files from S3
print(f"Searching Viz Cache for /{ref_date}/{reference_time}/ with files including {include_files_with} and not including {skip_files_with}.")
download_files_from_s3(bucket_name, folder_name, destination_dir, sso_profile, include_files_with=include_files_with, skip_files_with=skip_files_with, overwrite=False, output_format=output_format)
# Convert to geospatial (clip to states as well, if desired)
convert_folder_csvs_to_geospatial(destination_dir, output_format=output_format, clip_to_states=clip_to_states, delete_csv=delete_csv)
events = [
{"start_date": date(2023, 12, 21), "end_date": date(2023, 12, 21), "reference_times": ["0900", "1000"]},
{"start_date": date(2023, 12, 16), "end_date": date(2023, 12, 17), "reference_times": ["1200"]},
{"start_date": date(2023, 12, 17), "end_date": date(2023, 12, 17), "reference_times": ["1300", "1400"]},
{"start_date": date(2023, 12, 5), "end_date": date(2023, 12, 5), "reference_times": ["1400", "1500"]},
{"start_date": date(2023, 12, 2), "end_date": date(2023, 12, 2), "reference_times": ["1300", "1400"]},
{"start_date": date(2023, 10, 28), "end_date": date(2023, 10, 28), "reference_times": ["0900", "1000"]},
{"start_date": date(2023, 10, 24), "end_date": date(2023, 10, 24), "reference_times": ["1600", "1700"]},
{"start_date": date(2023, 10, 4), "end_date": date(2023, 10, 4), "reference_times": ["2100", "2200"]},
{"start_date": date(2023, 10, 3), "end_date": date(2023, 10, 3), "reference_times": ["2200", "2300"]}
]

###############################################
for event in events:
start_date = event['start_date']
end_date = event['end_date']
reference_times = event['reference_times']
# Loop through days/hours specified
for day in daterange(start_date, end_date):
ref_date = day.strftime("%Y%m%d")
for reference_time in reference_times:
folder_name = f"viz_cache/{ref_date}/{reference_time}/"
destination_dir = fr"{output_dir}\{ref_date}\{reference_time}"
# Download files from S3
print(f"Searching Viz Cache for /{ref_date}/{reference_time}/ with files including {include_files_with} and not including {skip_files_with}.")
download_files_from_s3(bucket_name, folder_name, destination_dir, sso_profile, include_files_with=include_files_with, skip_files_with=skip_files_with, overwrite=False, output_format=output_format)
# Convert to geospatial (clip to states as well, if desired)
convert_folder_csvs_to_geospatial(destination_dir, output_format=output_format, clip_to_states=clip_to_states, delete_csv=delete_csv)

print(f"Finished in {round(time.time()-start,0)/60} minutes")