Skip to content

Commit

Permalink
Cached FIM - Part 1b - Deployment Fixes (#622)
Browse files Browse the repository at this point in the history
Few minor fixes to the lambda functions here that I apparently missed in
the last PR, as well as the requirements.txt issue we worked out
together.

I'll keep monitoring the pipelines throughout the day today, and add any
additional fixes here as well.
  • Loading branch information
TylerSchrag-NOAA authored Jan 31, 2024
1 parent 97e58a2 commit 8243437
Show file tree
Hide file tree
Showing 11 changed files with 42 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ geopandas==0.11.1
SQLAlchemy==1.4.40
GeoAlchemy2==0.12.3
psycopg2-binary==2.9.3
pandas==2.1.4
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,5 @@ SELECT
fim.note
FROM {db_fim_table}_zero_stage AS fim
LEFT OUTER JOIN fim_cache.hand_hydrotable_cached_zero_stage AS hczs ON fim.hand_id = hczs.hand_id
WHERE hczs.rc_discharge_cms IS NULL;
WHERE hczs.rc_discharge_cms IS NULL
AND fim.rc_discharge_cms IS NOT NULL;
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,8 @@ def lambda_handler(event, context):
if os.path.exists(os.path.join("fim_configs", sql_file + '.sql')): #if there is product-specific fim_configs sql file, use it.
sql_files_to_run.append({"sql_file":sql_file, "folder": "fim_configs", "db_type":db_type})
else: # if not, use the fim_publish_template
folder = 'fim_caching_templates'
sql_file = '4_create_fim_config_publish_table'
sql_files_to_run.append({"sql_file":sql_file, "folder": folder, "db_type":db_type, "check_dependencies": False})
sql_templates_to_run = event['sql_templates_to_run']
sql_files_to_run.extend(sql_templates_to_run)

##########################################################

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ SELECT
buildings.prod_date,
buildings.source,
buildings.val_method,
fim.hydro_id,
flows.hydro_id,
flows.hydro_id::TEXT AS hydro_id_str,
flows.feature_id,
Expand All @@ -36,8 +35,8 @@ SELECT
buildings.prop_st as state,
max(fim.streamflow_cfs) AS max_flow_cfs,
avg(fim.streamflow_cfs) AS avg_flow_cfs,
max(fim.rc_stage_ft) AS max_fim_stage_ft,
avg(fim.rc_stage_ft) AS avg_fim_stage_ft,
max(fim.fim_stage_ft) AS max_fim_stage_ft,
avg(fim.fim_stage_ft) AS avg_fim_stage_ft,
count(buildings.build_id) AS buildings_impacted,
sum(buildings.sqfeet) AS building_sqft_impacted,
sum(CASE WHEN buildings.occ_cls = 'Agriculture' THEN 1 ELSE 0 END) AS bldgs_agriculture,
Expand Down Expand Up @@ -66,8 +65,8 @@ SELECT
TO_CHAR(hucs.huc10, 'fm0000000000') AS huc10_str,
max(fim.streamflow_cfs) AS max_flow_cfs,
avg(fim.streamflow_cfs) AS avg_flow_cfs,
max(fim.rc_stage_ft) AS max_fim_stage_ft,
avg(fim.rc_stage_ft) AS avg_fim_stage_ft,
max(fim.fim_stage_ft) AS max_fim_stage_ft,
avg(fim.fim_stage_ft) AS avg_fim_stage_ft,
count(buildings.build_id) AS buildings_impacted,
sum(buildings.sqfeet) AS building_sqft_impacted,
sum(CASE WHEN buildings.occ_cls = 'Agriculture' THEN 1 ELSE 0 END) AS bldgs_agriculture,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ SELECT
buildings.prod_date,
buildings.source,
buildings.val_method,
fim.hydro_id,
flows.hydro_id,
flows.hydro_id::TEXT AS hydro_id_str,
flows.feature_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ SELECT
buildings.prod_date,
buildings.source,
buildings.val_method,
fim.hydro_id,
flows.hydro_id,
flows.hydro_id::TEXT AS hydro_id_str,
flows.feature_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ SELECT
buildings.prod_date,
buildings.source,
buildings.val_method,
fim.hydro_id,
flows.hydro_id,
flows.hydro_id::TEXT AS hydro_id_str,
flows.feature_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ SELECT
buildings.prod_date,
buildings.source,
buildings.val_method,
fim.hydro_id,
flows.hydro_id,
flows.hydro_id::TEXT AS hydro_id_str,
flows.feature_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ SELECT
buildings.prod_date,
buildings.source,
buildings.val_method,
fim.hydro_id,
flows.hydro_id,
flows.hydro_id::TEXT AS hydro_id_str,
flows.feature_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ SELECT
buildings.prod_date,
buildings.source,
buildings.val_method,
fim.hydro_id,
flows.hydro_id,
flows.hydro_id::TEXT AS hydro_id_str,
flows.feature_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,30 +138,43 @@ def convert_folder_csvs_to_geospatial(folder_path, output_format='gpkg', clip_to
# this up, see the Configuring AWS CLI section of the Hydrovis viz Guide at https://docs.google.com/document/d/1UIbAQycG-mWw5XwDPDunkQED5O96YtsbrOA4MMZ9zmA/edit?usp=sharing

########## Specify your Args Here #############
sso_profile = None # The name of the AWS SSO profile you created, or set to None if you want to pull from the current environment of an EC2 machine (see notes above)
bucket_name = 'hydrovis-ti-fim-us-east-1' # Set this based on the hydrovis environment you are pulling from, e.g. 'hydrovis-ti-fim-us-east-1', 'hydrovis-uat-fim-us-east-1', 'hydrovis-prod-fim-us-east-1'
start_date = date(2022, 9, 17)
end_date = date(2022, 9, 20)
reference_times = ["1200"]
include_files_with = ["ref", "prvi"] # Anything you want to be included when filtering S3 files e.g ["ana", "mrf"] or ["mrf_"]
skip_files_with = [] # Anything you want to be skipped when filtering S3 files e.g. ["ana_streamflow", "rapid_onset_flooding"]
sso_profile = "prod" # The name of the AWS SSO profile you created, or set to None if you want to pull from the current environment of an EC2 machine (see notes above)
bucket_name = 'hydrovis-prod-fim-us-east-1' # Set this based on the hydrovis environment you are pulling from, e.g. 'hydrovis-ti-fim-us-east-1', 'hydrovis-uat-fim-us-east-1', 'hydrovis-prod-fim-us-east-1'
include_files_with = ["ana_inundation"] # Anything you want to be included when filtering S3 files e.g ["ana", "mrf"] or ["mrf_"]
skip_files_with = ["counties", "hucs", "building", "_hi.csv", "_prvi", "_public", "_src_skill"] # Anything you want to be skipped when filtering S3 files e.g. ["ana_streamflow", "rapid_onset_flooding"]
clip_to_states = [] # Provide a list of state abbreviations to clip to set states, e.g. ["AL", "GA", "MS"]
output_format = "gpkg" # Set to gpkg or shp - Can add any OGR formats, with some tweaks to the file_format logic in the functions above. BEWARE - large FIM files can be too large for shapefiles, and results may be truncated.
output_dir = r"C:\Users\arcgis\Desktop\Dev\VPP Data Requests" # Directory where you want output files saved.
output_dir = r"C:\Users\arcgis\Desktop\Dev\VPP Data Requests\AEP_2_1" # Directory where you want output files saved.
overwrite = False # This will automatically skip files that have already been downloaded and/or converted when running the script when set to False (default).
delete_csv = True # This will delete the csv files after conversion
###############################################

# Loop through days/hours specified
for day in daterange(start_date, end_date):
ref_date = day.strftime("%Y%m%d")
for reference_time in reference_times:
folder_name = f"viz_cache/{ref_date}/{reference_time}/"
destination_dir = fr"{output_dir}\{ref_date}\{reference_time}"
# Download files from S3
print(f"Searching Viz Cache for /{ref_date}/{reference_time}/ with files including {include_files_with} and not including {skip_files_with}.")
download_files_from_s3(bucket_name, folder_name, destination_dir, sso_profile, include_files_with=include_files_with, skip_files_with=skip_files_with, overwrite=False, output_format=output_format)
# Convert to geospatial (clip to states as well, if desired)
convert_folder_csvs_to_geospatial(destination_dir, output_format=output_format, clip_to_states=clip_to_states, delete_csv=delete_csv)
events = [
{"start_date": date(2023, 12, 21), "end_date": date(2023, 12, 21), "reference_times": ["0900", "1000"]},
{"start_date": date(2023, 12, 16), "end_date": date(2023, 12, 17), "reference_times": ["1200"]},
{"start_date": date(2023, 12, 17), "end_date": date(2023, 12, 17), "reference_times": ["1300", "1400"]},
{"start_date": date(2023, 12, 5), "end_date": date(2023, 12, 5), "reference_times": ["1400", "1500"]},
{"start_date": date(2023, 12, 2), "end_date": date(2023, 12, 2), "reference_times": ["1300", "1400"]},
{"start_date": date(2023, 10, 28), "end_date": date(2023, 10, 28), "reference_times": ["0900", "1000"]},
{"start_date": date(2023, 10, 24), "end_date": date(2023, 10, 24), "reference_times": ["1600", "1700"]},
{"start_date": date(2023, 10, 4), "end_date": date(2023, 10, 4), "reference_times": ["2100", "2200"]},
{"start_date": date(2023, 10, 3), "end_date": date(2023, 10, 3), "reference_times": ["2200", "2300"]}
]

###############################################
for event in events:
start_date = event['start_date']
end_date = event['end_date']
reference_times = event['reference_times']
# Loop through days/hours specified
for day in daterange(start_date, end_date):
ref_date = day.strftime("%Y%m%d")
for reference_time in reference_times:
folder_name = f"viz_cache/{ref_date}/{reference_time}/"
destination_dir = fr"{output_dir}\{ref_date}\{reference_time}"
# Download files from S3
print(f"Searching Viz Cache for /{ref_date}/{reference_time}/ with files including {include_files_with} and not including {skip_files_with}.")
download_files_from_s3(bucket_name, folder_name, destination_dir, sso_profile, include_files_with=include_files_with, skip_files_with=skip_files_with, overwrite=False, output_format=output_format)
# Convert to geospatial (clip to states as well, if desired)
convert_folder_csvs_to_geospatial(destination_dir, output_format=output_format, clip_to_states=clip_to_states, delete_csv=delete_csv)

print(f"Finished in {round(time.time()-start,0)/60} minutes")

0 comments on commit 8243437

Please sign in to comment.