Skip to content

Commit

Permalink
Refactor to remove dependency on WRDS API
Browse files Browse the repository at this point in the history
  • Loading branch information
shawncrawley committed Aug 5, 2024
1 parent ac3e6fc commit 08d4a24
Show file tree
Hide file tree
Showing 16 changed files with 512 additions and 801 deletions.
25 changes: 7 additions & 18 deletions Core/EC2/DataServices/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,6 @@ variable "private_route_53_zone" {
# THIS TF CONFIG IS DEPENDANT ON A SSH KEY THAT CAN ACCESS THE WRDS VLAB REPOS
locals {
ssh_key_filename = "id_ed25519"
instance_name = "hv-vpp-${var.environment}-data-services"
instance_names = [local.instance_name, format("%s-for-tests", local.instance_name)]
logging_application_name = "data_services"
logging_application_names = [local.logging_application_name, format("test_%s", local.logging_application_name)]
location_db_names = [var.location_db_name, format("%s_ondeck", var.location_db_name)]
cloudinit_config_data = {
write_files = [
{
Expand All @@ -99,7 +94,7 @@ locals {
permissions = "0777"
owner = "ec2-user:ec2-user"
content = templatefile("${path.module}/templates/env/location.env.tftpl", {
db_name = "$${location_db_name}}"
db_name = var.location_db_name
db_username = jsondecode(var.location_credentials_secret_string)["username"]
db_password = jsondecode(var.location_credentials_secret_string)["password"]
db_host = var.rds_host
Expand Down Expand Up @@ -131,7 +126,7 @@ locals {
owner = "ec2-user:ec2-user"
content = templatefile("${path.module}/templates/env/forecast.env.tftpl", {
db_name = var.forecast_db_name
location_db_name = "$${location_db_name}}"
location_db_name = var.location_db_name
db_username = jsondecode(var.forecast_credentials_secret_string)["username"]
db_password = jsondecode(var.forecast_credentials_secret_string)["password"]
db_host = var.rds_host
Expand Down Expand Up @@ -163,7 +158,6 @@ locals {

# Writes the ssh key, .env files, and docker-compose.yml files to EC2 and starts the startup.sh
data "cloudinit_config" "startup" {
count = 2
gzip = false
base64_encode = false

Expand All @@ -172,7 +166,7 @@ data "cloudinit_config" "startup" {
filename = "cloud-config.yaml"
content = <<-END
#cloud-config
${jsonencode(jsondecode(replace(tostring(jsonencode(local.cloudinit_config_data)), "$${location_db_name}}", tostring(local.location_db_names[count.index]))))}
${jsonencode(local.cloudinit_config_data)}
END
}

Expand All @@ -185,15 +179,14 @@ data "cloudinit_config" "startup" {
location_api_3_0_commit = var.data_services_versions["location_api_3_0_commit"]
forecast_api_2_0_commit = var.data_services_versions["forecast_api_2_0_commit"]
ssh_key_filename = local.ssh_key_filename
logging_application_name = local.logging_application_names[count.index]
logging_application_name = "data_services"
instance = count.index
})
}
}

# EC2 Related Resources
resource "aws_instance" "data_services" {
count = 2
ami = data.aws_ami.linux.id
iam_instance_profile = var.ec2_instance_profile_name
instance_type = "c5.xlarge"
Expand All @@ -207,12 +200,12 @@ resource "aws_instance" "data_services" {
}

root_block_device {
encrypted = count.index == 0
kms_key_id = count.index == 0 ? var.kms_key_arn : ""
encrypted = true
kms_key_id = var.kms_key_arn
}

tags = {
Name = local.instance_names[count.index]
Name = "hv-vpp-${var.environment}-data-services"
OS = "Linux"
}

Expand Down Expand Up @@ -245,7 +238,3 @@ data "aws_ami" "linux" {
output "dns_name" {
value = aws_route53_record.hydrovis.name
}

output "dataservices-test-instance-id" {
value = aws_instance.data_services[1].id
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ elif [ "$db_instance_tag" == "viz" ]; then
fi

aws s3 cp $s3_uri /tmp/db.sql.gz && \
psql -c "DROP DATABASE IF EXISTS $db_name;" && \
psql -c "CREATE DATABASE $db_name;" && \
cat /tmp/db.sql.gz | gunzip | psql $db_name && \
rm /tmp/db.sql.gz
Expand Down
13 changes: 13 additions & 0 deletions Core/LAMBDA/layers/viz_lambda_shared_funcs/python/viz_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,19 @@ def run_sql_file_in_db(self, sql_file):
except Exception as e:
raise e
self.connection.close()

###################################
def execute_sql(self, sql):
if sql.endswith('.sql') and os.path.exists(sql):
sql = open(sql, 'r').read()
with self.connection:
try:
with self.connection.cursor() as cur:
print(f"---> Running provided SQL:\n{sql}")
cur.execute(sql)
except Exception as e:
raise e
self.connection.close()

###################################
def run_sql_in_db(self, sql, return_geodataframe=False):
Expand Down
172 changes: 83 additions & 89 deletions Core/LAMBDA/viz_functions/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,6 @@ variable "db_lambda_security_groups" {
type = list(any)
}

variable "nat_sg_group" {
type = string
}

variable "db_lambda_subnets" {
description = "Subnets to use for the db-pipeline lambdas."
type = list(any)
Expand Down Expand Up @@ -119,11 +115,26 @@ variable "viz_db_user_secret_string" {
type = string
}

variable "viz_db_suser_seceret_string" {
description = "The secret string of the viz_processing data base superuser to write/read data as."
type = string
}

variable "egis_db_user_secret_string" {
description = "The secret string for the egis rds database."
type = string
}

variable "wrds_db_host" {
description = "Hostname of the viz processing RDS instance."
type = string
}

variable "wrds_db_user_secret_string" {
description = "The secret string of the viz_processing data base user to write/read data as."
type = string
}

variable "egis_portal_password" {
description = "The password for the egis portal user to publish as."
type = string
Expand Down Expand Up @@ -169,10 +180,6 @@ variable "viz_lambda_shared_funcs_layer" {
type = string
}

variable "dataservices_host" {
type = string
}

variable "viz_pipeline_step_function_arn" {
type = string
}
Expand Down Expand Up @@ -207,85 +214,6 @@ locals {
])
}

########################################################################################################################################
########################################################################################################################################

###############################
## WRDS API Handler Function ##
###############################
data "archive_file" "wrds_api_handler_zip" {
type = "zip"

source_file = "${path.module}/viz_wrds_api_handler/lambda_function.py"

output_path = "${path.module}/temp/viz_wrds_api_handler_${var.environment}_${var.region}.zip"
}

resource "aws_s3_object" "wrds_api_handler_zip_upload" {
bucket = var.deployment_bucket
key = "terraform_artifacts/${path.module}/viz_wrds_api_handler.zip"
source = data.archive_file.wrds_api_handler_zip.output_path
source_hash = filemd5(data.archive_file.wrds_api_handler_zip.output_path)
}

resource "aws_lambda_function" "viz_wrds_api_handler" {
function_name = "hv-vpp-${var.environment}-viz-wrds-api-handler"
description = "Lambda function to ping WRDS API and format outputs for processing."
memory_size = 512
timeout = 900
vpc_config {
security_group_ids = [var.nat_sg_group]
subnet_ids = var.db_lambda_subnets
}
environment {
variables = {
DATASERVICES_HOST = var.dataservices_host
PYTHON_PREPROCESSING_BUCKET = var.python_preprocessing_bucket
PROCESSED_OUTPUT_PREFIX = "max_stage/ahps"
INITIALIZE_PIPELINE_FUNCTION = aws_lambda_function.viz_initialize_pipeline.arn
}
}
s3_bucket = aws_s3_object.wrds_api_handler_zip_upload.bucket
s3_key = aws_s3_object.wrds_api_handler_zip_upload.key
source_code_hash = filebase64sha256(data.archive_file.wrds_api_handler_zip.output_path)
runtime = "python3.9"
handler = "lambda_function.lambda_handler"
role = var.lambda_role
layers = [
var.arcgis_python_api_layer,
var.es_logging_layer,
var.viz_lambda_shared_funcs_layer
]
tags = {
"Name" = "hv-vpp-${var.environment}-viz-wrds-api-handler"
}
}

resource "aws_cloudwatch_event_target" "check_lambda_every_five_minutes" {
rule = var.five_minute_trigger.name
target_id = aws_lambda_function.viz_initialize_pipeline.function_name
arn = aws_lambda_function.viz_initialize_pipeline.arn
input = "{\"configuration\":\"rfc\"}"
}

resource "aws_lambda_permission" "allow_cloudwatch_to_call_check_lambda" {
statement_id = "AllowExecutionFromCloudWatch"
action = "lambda:InvokeFunction"
function_name = aws_lambda_function.viz_wrds_api_handler.function_name
principal = "events.amazonaws.com"
source_arn = var.five_minute_trigger.arn
}

resource "aws_lambda_function_event_invoke_config" "viz_wrds_api_handler" {
function_name = resource.aws_lambda_function.viz_wrds_api_handler.function_name
maximum_retry_attempts = 0
destination_config {
on_failure {
destination = var.email_sns_topics["viz_lambda_errors"].arn
}
}
}

##################################
## EGIS Health Checker Function ##
##################################
Expand Down Expand Up @@ -970,6 +898,72 @@ resource "aws_lambda_function_event_invoke_config" "viz_publish_service_destinat
}
}


######################
## VIZ TEST WRDS DB ##
######################
data "archive_file" "viz_test_wrds_db_zip" {
type = "zip"
output_path = "${path.module}/temp/test_sql_${var.environment}_${var.region}.zip"

source {
content = file("${path.module}/viz_test_wrds_db/lambda_function.py")
filename = "lambda_function.py"
}

dynamic "source" {
for_each = fileset("${path.module}", "**/*.sql")
content {
content = file("${path.module}/${source.key}")
filename = basename(source.key)
}
}
}

resource "aws_s3_object" "viz_test_wrds_db_upload" {
bucket = var.deployment_bucket
key = "terraform_artifacts/${path.module}/viz_update_egis_data.zip"
source = data.archive_file.viz_test_wrds_db_zip.output_path
source_hash = filemd5(data.archive_file.viz_test_wrds_db_zip.output_path)
}

resource "aws_lambda_function" "viz_test_wrds_db" {
function_name = "hv-vpp-${var.environment}-viz-test-wrds-db"
description = "Lambda function to test the wrds_location3_ondeck db before it is swapped for the live version"
timeout = 900
memory_size = 5000
vpc_config {
security_group_ids = var.db_lambda_security_groups
subnet_ids = var.db_lambda_subnets
}
environment {
variables = {
WRDS_DB_HOST = var.wrds_db_host
WRDS_DB_USERNAME = jsondecode(var.wrds_db_user_secret_string)["username"]
WRDS_DB_PASSWORD = jsondecode(var.wrds_db_user_secret_string)["password"]
VIZ_DB_DATABASE = var.viz_db_name
VIZ_DB_HOST = var.viz_db_host
VIZ_DB_USERNAME = jsondecode(var.viz_db_suser_seceret_string)["username"]
VIZ_DB_PASSWORD = jsondecode(var.viz_db_suser_seceret_string)["password"]
}
}
s3_bucket = aws_s3_object.viz_test_wrds_db_upload.bucket
s3_key = aws_s3_object.viz_test_wrds_db_upload.key
source_code_hash = filebase64sha256(data.archive_file.viz_test_wrds_db_zip.output_path)
runtime = "python3.9"
handler = "lambda_function.lambda_handler"
role = var.lambda_role
layers = [
var.psycopg2_sqlalchemy_layer,
var.viz_lambda_shared_funcs_layer
]
tags = {
"Name" = "hv-vpp-${var.environment}-viz-test-wrds-db"
}
}



#########################
## Image Based Lambdas ##
#########################
Expand Down Expand Up @@ -1037,8 +1031,8 @@ output "publish_service" {
value = aws_lambda_function.viz_publish_service
}

output "wrds_api_handler" {
value = aws_lambda_function.viz_wrds_api_handler
output "test_wrds_db" {
value = aws_lambda_function.viz_test_wrds_db
}

output "egis_health_checker" {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ DROP TABLE IF EXISTS cache.rfc_categorical_flows;
-- Create temporary routelink tables (dropped at end)
SELECT *
INTO ingest.nwm_routelink
FROM external.nwm_routelink;
FROM external.nwm_routelink_3_0_conus;

SELECT
main.nwm_feature_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ DROP TABLE IF EXISTS cache.rfc_categorical_stages;
-- calculation
SELECT *
INTO ingest.routelink
FROM external.nwm_routelink;
FROM external.nwm_routelink_3_0_conus;

SELECT
main.nwm_feature_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ max_flows_station_xwalk AS (
ELSE FALSE
END AS is_waterbody
FROM cache.max_flows_rnr mf
LEFT JOIN external.nwm_routelink rl
LEFT JOIN external.nwm_routelink_3_0_conus rl
ON rl.nwm_feature_id = mf.feature_id
LEFT JOIN rnr.nwm_crosswalk xwalk
ON xwalk.nwm_feature_id = mf.feature_id
Expand Down
Loading

0 comments on commit 08d4a24

Please sign in to comment.