Skip to content

Commit

Permalink
Lambda function working through terraform
Browse files Browse the repository at this point in the history
  • Loading branch information
kennethjmyers committed Aug 9, 2023
1 parent 633c9b7 commit 0f47861
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 106 deletions.
19 changes: 19 additions & 0 deletions .terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

65 changes: 0 additions & 65 deletions configUtils.py

This file was deleted.

12 changes: 5 additions & 7 deletions lambdaFunctions/getRedditDataFunction/lambda_function.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import redditUtils as ru
import configUtils as cu
import viral_reddit_posts_utils.configUtils as cu
import tableDefinition
import praw
import boto3
Expand Down Expand Up @@ -41,9 +41,8 @@ def lambda_handler(event, context):
risingData = ru.deduplicateRedditData(risingData)

# Push to DynamoDB
tableName = view
risingRawTableDefinition = tableDefinition.getTableDefinition(tableName)
risingTable = ru.getOrCreateTable(risingRawTableDefinition, dynamodb_resource)
tableName = f"{view}-{os.environ['ENV']}"
risingTable = ru.getTable(tableName, dynamodb_resource)
ru.batchWriter(risingTable, risingData, schema)

# Get Hot Reddit data
Expand All @@ -55,9 +54,8 @@ def lambda_handler(event, context):
hotData = ru.deduplicateRedditData(hotData)

# Push to DynamoDB
tableName = view
hotTableDefinition = tableDefinition.getTableDefinition(tableName)
hotTable = ru.getOrCreateTable(hotTableDefinition, dynamodb_resource)
tableName = f"{view}-{os.environ['ENV']}"
hotTable = ru.getTable(tableName, dynamodb_resource)
ru.batchWriter(hotTable, hotData, schema)

return 200
18 changes: 2 additions & 16 deletions lambdaFunctions/getRedditDataFunction/redditUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,22 +92,8 @@ def deduplicateRedditData(data):
return newData


def getOrCreateTable(tableDefinition, dynamodb_resource):
existingTables = [a.name for a in dynamodb_resource.tables.all()] # client method: dynamodb_client.list_tables()['TableNames']
tableName = tableDefinition['TableName']
if tableName not in existingTables:
print(f"Table {tableName} not found, creating table")
# create table
# boto3: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb/service-resource/create_table.html#DynamoDB.ServiceResource.create_table
# dynamodb keyschemas and secondary indexes: https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.CoreComponents.html
table = dynamodb_resource.create_table(**tableDefinition)

# Wait until the table exists.
table.wait_until_exists()

else:
print(f"Table {tableName} exists, grabbing table...")
table = dynamodb_resource.Table(tableName)
def getTable(tableName, dynamodb_resource):
table = dynamodb_resource.Table(tableName)

# Print out some data about the table.
print(f"Item count in table: {table.item_count}") # this only updates every 6 hours
Expand Down
75 changes: 62 additions & 13 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ variable "info" {
default = {
name = "viralredditposts"
env = "dev"
region = "us-east-2"
pyversion = "3.7"
}
}
Expand All @@ -30,11 +31,21 @@ locals {
}

# zip the lambda function
resource "null_resource" "zip_function" {
provisioner "local-exec" {
command = "./scripts/zipLambdaFunction.sh -f getRedditDataFunction"
on_failure = fail # OR continue
}
# resource "null_resource" "zip_function" {
# # rebuild zip each time, this is low cost and good for forcing it to upload each terraform apply
# triggers = {
# build_number = timestamp()
# }
# provisioner "local-exec" {
# command = "./scripts/zipLambdaFunction.sh -f getRedditDataFunction"
# on_failure = fail # OR continue
# }
# }

data "archive_file" "lambda_zip" {
type = "zip"
source_dir = "./lambdaFunctions/getRedditDataFunction/"
output_path = "./scripts/zippedLambdaFunction/getRedditDataFunction.zip"
}

# zip the PRAW and boto3 packages
Expand All @@ -45,7 +56,7 @@ resource "null_resource" "zip_python_packages" {
build_number = timestamp()
}
provisioner "local-exec" {
command = "source venv/bin/activate && ./scripts/zipPythonPackage.sh -v ${var.info.pyversion} praw==7.7.0 boto3==1.26.117"
command = "source venv/bin/activate && ./scripts/zipPythonPackage.sh -v ${var.info.pyversion} praw==7.7.0 boto3==1.26.117 git+https://github.com/ViralRedditPosts/Utils.git@main"
on_failure = fail # OR continue
}
}
Expand Down Expand Up @@ -78,6 +89,20 @@ resource "aws_s3_object" "move_boto3_zip" {
}
}

# add git+https://github.com/ViralRedditPosts/Utils.git@main to S3
resource "aws_s3_object" "move_utils_zip" {
depends_on = [null_resource.zip_python_packages]

bucket = "packages-${var.info.name}-${var.info.env}-${local.account_id}"
key = "[email protected]"
source = "./scripts/zippedPythonPackages/Utils.git@main/[email protected]"
tags = {
Name = "utils-zip"
Environment = "${var.info.env}"
Project = "viral-reddit-posts"
}
}

# define policy for attaching role
data "aws_iam_policy_document" "assume_role" {
statement {
Expand All @@ -99,11 +124,15 @@ data "aws_iam_policy_document" "inline_policy" {
effect = "Allow"
actions = [
"s3:GetObject",
"s3:ListBucket"
"s3:ListBucket",
"dynamodb:DescribeTable",
"dynamodb:BatchWriteItem"
]
resources = [
"arn:aws:s3:::data-${var.info.name}-${var.info.env}-${local.account_id}",
"arn:aws:s3:::data-${var.info.name}-${var.info.env}-${local.account_id}/*"
"arn:aws:s3:::data-${var.info.name}-${var.info.env}-${local.account_id}/*",
"arn:aws:dynamodb:${var.info.region}:${local.account_id}:table/hot-${var.info.env}",
"arn:aws:dynamodb:${var.info.region}:${local.account_id}:table/rising-${var.info.env}"
]
}
}
Expand Down Expand Up @@ -146,15 +175,28 @@ resource "aws_lambda_layer_version" "boto3_layer" {
s3_bucket = "packages-${var.info.name}-${var.info.env}-${local.account_id}"
s3_key = "boto3==1.26.117.zip"
layer_name = "boto3-1_26_117"
description = "python binaries for pboto3==1.26.117 library"
description = "python binaries for boto3==1.26.117 library"
compatible_architectures = ["x86_64"]
compatible_runtimes = ["python${var.info.pyversion}"]
}

resource "aws_lambda_layer_version" "utils_layer" {
depends_on = [aws_s3_object.move_boto3_zip]
# you either have to specify a local filename or the s3 object
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_layer_version
# filename = "lambda_layer_payload.zip"
s3_bucket = "packages-${var.info.name}-${var.info.env}-${local.account_id}"
s3_key = "[email protected]"
layer_name = "utils_layer"
description = "python binaries for Utils.git@main library"
compatible_architectures = ["x86_64"]
compatible_runtimes = ["python${var.info.pyversion}"]
}

# make lambda function
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function
resource "aws_lambda_function" "test_lambda" {
depends_on = [resource.null_resource.zip_function]
resource "aws_lambda_function" "lambda_function" {
# depends_on = [resource.null_resource.zip_function]

filename = "./scripts/zippedLambdaFunction/getRedditDataFunction.zip"
function_name = "lambda-reddit-scraping-${var.info.env}"
Expand All @@ -167,11 +209,18 @@ resource "aws_lambda_function" "test_lambda" {
size = 512 # Min 512 MB and the Max 10240 MB
}

layers = [aws_lambda_layer_version.praw_layer.arn, aws_lambda_layer_version.boto3_layer.arn]
layers = [
aws_lambda_layer_version.praw_layer.arn,
aws_lambda_layer_version.boto3_layer.arn,
aws_lambda_layer_version.utils_layer.arn,
]

source_code_hash = data.archive_file.lambda_zip.output_base64sha256

environment {
variables = {
AWS_BUCKET = "data-${var.info.name}-${var.info.env}-${local.account_id}"
AWS_BUCKET = "data-${var.info.name}-${var.info.env}-${local.account_id}",
ENV = "${var.info.env}"
}
}
tags = {
Expand Down
16 changes: 11 additions & 5 deletions scripts/zipPythonPackage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,15 @@ cd $SCRIPT_PATH

for package in "$@"; do
echo "Preparing ${package}..."
mkdir -p ./zippedPythonPackages/${package}/python
# format the zip file. needed for the git packages which have lots of slashes.
if [[ ${package} == "git+"* ]]; then
package_name=${package##*/} # https://stackoverflow.com/questions/3162385/how-to-split-a-string-in-shell-and-get-the-last-field
else
package_name=${package}
fi
mkdir -p ./zippedPythonPackages/${package_name}/python

cd ./zippedPythonPackages/${package}/python
cd ./zippedPythonPackages/${package_name}/python

# install binaries for package
pip install \
Expand All @@ -43,9 +49,9 @@ for package in "$@"; do
rm -rf *dist-info # some cleanup of unnecessary stuff
# zip package
cd ..
rm -rf ${package}.zip # remove first if it exists
echo "Zipping ${package} at $(pwd)"
zip -r ${package}.zip python # zip contents of python to zip name
rm -rf ${package_name}.zip # remove first if it exists
echo "Zipping ${package_name} at $(pwd)"
zip -r ${package_name}.zip python # zip contents of python to zip name
cd ../../ # go back out to scripts dir
done

Expand Down

0 comments on commit 0f47861

Please sign in to comment.