Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(daft): read delta table #24848

Merged
merged 1 commit into from
Mar 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ jobs:
attempt_delay: 2000

build-hm-prefect-calculate:
name: Build hm-prefect-calculate
name: Build prefect-calculate
needs: detect-changes
if: ${{ needs.detect-changes.outputs.hm-prefect-calculate == 'true' }}
runs-on: ubuntu-24.04
Expand Down Expand Up @@ -465,7 +465,7 @@ jobs:
attempt_delay: 2000

build-hm-prefect-daft-analysis:
name: Build hm-prefect-daft-analysis
name: Build prefect-daft-analysis
needs: detect-changes
if: ${{ needs.detect-changes.outputs.hm-prefect-daft-analysis == 'true' }}
runs-on: ubuntu-24.04
Expand Down Expand Up @@ -501,7 +501,7 @@ jobs:
attempt_delay: 2000

build-hm-prefect-greet:
name: Build hm-prefect-greet
name: Build prefect-greet
needs: detect-changes
if: ${{ needs.detect-changes.outputs.hm-prefect-greet == 'true' }}
runs-on: ubuntu-24.04
Expand Down Expand Up @@ -537,7 +537,7 @@ jobs:
attempt_delay: 2000

build-hm-prefect-print-platform:
name: Build hm-prefect-print-platform
name: Build prefect-print-platform
needs: detect-changes
if: ${{ needs.detect-changes.outputs.hm-prefect-print-platform == 'true' }}
runs-on: ubuntu-24.04
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ module "hm_glue_crawler_motor_data" {
source = "../../../../modules/aws/hm_aws_glue_crawler"
aws_glue_crawler_name = "hm-delta-lake-crawler-iot"
aws_glue_crawler_delta_tables = ["s3://hm-production-bucket/delta-tables/motor_data/"]
aws_glue_database = "production_hm_delta_db"
aws_glue_database = "production_motor_db"
schedule = "cron(40 9 * * ? *)" # Every day at 9:40 UTC https://crontab.cronhub.io/
iam_role_arn = "arn:aws:iam::272394222652:role/service-role/AWSGlueServiceRole-hm"
environment = var.environment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,8 @@ module "ray_cluster_iam_role" {
ray_cluster_namespace = "${var.environment}-hm-ray-cluster"
amazon_eks_cluster_oidc_provider = module.amazon_eks_cluster.oidc_provider
amazon_eks_cluster_oidc_provider_arn = module.amazon_eks_cluster.oidc_provider_arn
s3_bucket_name = module.amazon_s3_bucket_hm_mlflow.name
mlflow_s3_bucket_name = module.amazon_s3_bucket_hm_mlflow.name
iot_data_s3_bucket_name = "iot-data-bucket"
environment = var.environment
team = var.team
}
Expand All @@ -628,7 +629,7 @@ module "kubernetes_namespace_hm_ray_cluster" {
]
}
# Ray Cluster Valkey - Kubernetes namespace
module "hm_kubernetes_namespace_hm_ray_cluster_valkey" {
module "kubernetes_namespace_hm_ray_cluster_valkey" {
source = "../../../../modules/kubernetes/hm_kubernetes_namespace"
kubernetes_namespace = "${var.environment}-hm-ray-cluster-valkey"
labels = {
Expand Down Expand Up @@ -680,7 +681,7 @@ module "s3_bucket_hm_mimir_ruler" {
environment = var.environment
team = var.team
}
module "hm_mimir_iam_role" {
module "mimir_iam_role" {
providers = { aws = aws.production }
source = "../../../../modules/kubernetes/hm_mimir_iam_role"
mimir_service_account_name = "hm-mimir"
Expand Down Expand Up @@ -733,7 +734,7 @@ module "s3_bucket_hm_loki_ruler" {
environment = var.environment
team = var.team
}
module "hm_loki_iam_role" {
module "loki_iam_role" {
providers = { aws = aws.production }
source = "../../../../modules/kubernetes/hm_loki_iam_role"
loki_service_account_name = "hm-loki"
Expand Down Expand Up @@ -791,7 +792,7 @@ module "s3_bucket_hm_tempo_trace" {
environment = var.environment
team = var.team
}
module "hm_tempo_iam_role" {
module "tempo_iam_role" {
providers = { aws = aws.production }
source = "../../../../modules/kubernetes/hm_tempo_iam_role"
tempo_service_account_name = "hm-tempo"
Expand Down Expand Up @@ -896,7 +897,7 @@ data "aws_secretsmanager_secret_version" "hm_prefect_postgres_secret_version" {
provider = aws.production
secret_id = data.aws_secretsmanager_secret.hm_prefect_postgres_secret.id
}
module "hm_prefect_postgres_security_group" {
module "prefect_postgres_security_group" {
providers = { aws = aws.production }
source = "../../../../modules/aws/hm_amazon_rds_security_group"
amazon_ec2_security_group_name = "${local.prefect_postgres_name}-security-group"
Expand All @@ -905,23 +906,23 @@ module "hm_prefect_postgres_security_group" {
environment = var.environment
team = var.team
}
module "hm_prefect_postgres_subnet_group" {
module "prefect_postgres_subnet_group" {
providers = { aws = aws.production }
source = "../../../../modules/aws/hm_amazon_rds_subnet_group"
subnet_group_name = "${local.prefect_postgres_name}-subnet-group"
subnet_ids = var.amazon_vpc_private_subnet_ids
environment = var.environment
team = var.team
}
module "hm_prefect_postgres_parameter_group" {
module "prefect_postgres_parameter_group" {
providers = { aws = aws.production }
source = "../../../../modules/aws/hm_amazon_rds_parameter_group"
family = "postgres17"
parameter_group_name = "${local.prefect_postgres_name}-parameter-group"
environment = var.environment
team = var.team
}
module "hm_prefect_postgres_instance" {
module "prefect_postgres_instance" {
providers = { aws = aws.production }
source = "../../../../modules/aws/hm_amazon_rds_instance"
amazon_rds_name = local.prefect_postgres_name
Expand All @@ -940,7 +941,7 @@ module "hm_prefect_postgres_instance" {
team = var.team
}
# Prefect Server - Kubernetes namespace
module "hm_kubernetes_namespace_hm_prefect_server" {
module "kubernetes_namespace_hm_prefect_server" {
source = "../../../../modules/kubernetes/hm_kubernetes_namespace"
kubernetes_namespace = "${var.environment}-hm-prefect-server"
labels = {
Expand All @@ -950,8 +951,24 @@ module "hm_kubernetes_namespace_hm_prefect_server" {
module.hm_amazon_eks_cluster
]
}
# Prefect Worker - IAM role
module "prefect_worker_iam_role" {
providers = { aws = aws.production }
source = "../../../../modules/kubernetes/hm_prefect_worker_iam_role"
prefect_worker_service_account_name = "hm-prefect-worker"
prefect_worker_namespace = "${var.environment}-hm-prefect-worker"
amazon_eks_cluster_oidc_provider = module.hm_amazon_eks_cluster.oidc_provider
amazon_eks_cluster_oidc_provider_arn = module.hm_amazon_eks_cluster.oidc_provider_arn
iot_data_s3_bucket_name = "iot-data-bucket"
aws_glue_database_names = [
"${var.environment}_battery_db",
"${var.environment}_motor_db"
]
environment = var.environment
team = var.team
}
# Prefect Worker - Kubernetes namespace
module "hm_kubernetes_namespace_hm_prefect_worker" {
module "kubernetes_namespace_hm_prefect_worker" {
source = "../../../../modules/kubernetes/hm_kubernetes_namespace"
kubernetes_namespace = "${var.environment}-hm-prefect-worker"
labels = {
Expand Down Expand Up @@ -1090,7 +1107,7 @@ module "kubernetes_namespace_hm_kafbat_ui" {

# LiteLLM
# LiteLLM - IAM role
module "hm_litellm_iam_role" {
module "litellm_iam_role" {
providers = { aws = aws.production }
source = "../../../../modules/kubernetes/hm_litellm_iam_role"
litellm_service_account_name = "hm-litellm-service-account"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ resource "aws_iam_role_policy" "eks_cluster_s3_policy" {
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy
resource "aws_iam_role_policy" "iot_data_s3_policy" {
name = "${local.aws_iam_role_name_prefix}IoTDataS3Policy-${var.amazon_eks_cluster_name}"
name = "${local.aws_iam_role_name_prefix}IotDataS3Policy-${var.amazon_eks_cluster_name}"
role = aws_iam_role.s3_csi_driver_mountpoint_role.name
policy = jsonencode({
Version = "2012-10-17"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
}
}
}

data "aws_caller_identity" "current" {}
data "aws_region" "current" {}

locals {
aws_iam_role_name_prefix = "PrefectWorkerRole"
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role
resource "aws_iam_role" "prefect_worker_iam_role" {
name = "${local.aws_iam_role_name_prefix}-${var.prefect_worker_service_account_name}"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Federated = var.amazon_eks_cluster_oidc_provider_arn
}
Action = "sts:AssumeRoleWithWebIdentity"
Condition = {
StringEquals = {
"${var.amazon_eks_cluster_oidc_provider}:aud" = "sts.amazonaws.com",
"${var.amazon_eks_cluster_oidc_provider}:sub" = "system:serviceaccount:${var.prefect_worker_namespace}:${var.prefect_worker_service_account_name}"
}
}
}
]
})
tags = {
Environment = var.environment
Team = var.team
Name = "${local.aws_iam_role_name_prefix}-${var.prefect_worker_service_account_name}"
}
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy
resource "aws_iam_role_policy" "iot_data_s3_policy" {
name = "${local.aws_iam_role_name_prefix}IotDataS3Policy-${var.prefect_worker_service_account_name}"
role = aws_iam_role.prefect_worker_iam_role.name
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"s3:GetBucketLocation",
"s3:ListBucket"
]
Resource = [
"arn:aws:s3:::${var.iot_data_s3_bucket_name}"
]
},
{
Effect = "Allow"
Action = [
"s3:GetObject",
"s3:PutObject"
]
Resource = [
"arn:aws:s3:::${var.iot_data_s3_bucket_name}/*"
]
}
]
})
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy
resource "aws_iam_role_policy" "aws_glue_policy" {
name = "${local.aws_iam_role_name_prefix}AwsGluePolicy-${var.prefect_worker_service_account_name}"
role = aws_iam_role.prefect_worker_iam_role.name
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"glue:GetDatabase",
"glue:GetDatabases",
"glue:GetTable",
"glue:GetTables"
]
Resource = flatten([
"arn:aws:glue:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:catalog",
[for database in var.aws_glue_database_names :
"arn:aws:glue:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:database/${database}"
],
[for database in var.aws_glue_database_names :
"arn:aws:glue:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:table/${database}/*"
]
])
}
]
})
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
variable "prefect_worker_service_account_name" {
type = string
}
variable "prefect_worker_namespace" {
type = string
}
variable "iot_data_s3_bucket_name" {
type = string
}
variable "aws_glue_database_names" {
type = list(string)
}
variable "amazon_eks_cluster_oidc_provider" {
type = string
}
variable "amazon_eks_cluster_oidc_provider_arn" {
type = string
}
variable "environment" {
type = string
}
variable "team" {
type = string
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ locals {
aws_iam_role_name_prefix = "RayClusterRole"
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role
resource "aws_iam_role" "ray_cluster_role" {
resource "aws_iam_role" "ray_cluster_iam_role" {
name = "${local.aws_iam_role_name_prefix}-${var.ray_cluster_service_account_name}"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Expand All @@ -37,9 +37,9 @@ resource "aws_iam_role" "ray_cluster_role" {
}
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy
resource "aws_iam_role_policy" "ray_cluster_role_policy" {
name = "${local.aws_iam_role_name_prefix}Policy-${var.ray_cluster_service_account_name}"
role = aws_iam_role.ray_cluster_role.name
resource "aws_iam_role_policy" "mlflow_s3_policy" {
name = "${local.aws_iam_role_name_prefix}MLflowS3Policy-${var.ray_cluster_service_account_name}"
role = aws_iam_role.ray_cluster_iam_role.name
policy = jsonencode({
Version = "2012-10-17"
Statement = [
Expand All @@ -49,7 +49,36 @@ resource "aws_iam_role_policy" "ray_cluster_role_policy" {
"s3:PutObject"
]
Resource = [
"arn:aws:s3:::${var.s3_bucket_name}/*"
"arn:aws:s3:::${var.mlflow_s3_bucket_name}/*"
]
}
]
})
}
# https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy
resource "aws_iam_role_policy" "iot_data_s3_policy" {
name = "${local.aws_iam_role_name_prefix}IotDataS3Policy-${var.ray_cluster_service_account_name}"
role = aws_iam_role.ray_cluster_iam_role.name
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"s3:ListBucket"
]
Resource = [
"arn:aws:s3:::${var.iot_data_s3_bucket_name}"
]
},
{
Effect = "Allow"
Action = [
"s3:GetObject",
"s3:PutObject"
]
Resource = [
"arn:aws:s3:::${var.iot_data_s3_bucket_name}/*"
]
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ variable "amazon_eks_cluster_oidc_provider" {
variable "amazon_eks_cluster_oidc_provider_arn" {
type = string
}
variable "s3_bucket_name" {
variable "mlflow_s3_bucket_name" {
type = string
}
variable "iot_data_s3_bucket_name" {
type = string
}
variable "environment" {
Expand Down
6 changes: 3 additions & 3 deletions cloud-platform/aws/aws-glue/justfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ aws-glue-crawler-connection-get:
aws glue get-connection --name=hm-postgres-connection

aws-glue-partition-index-list:
aws glue get-partition-indexes --database-name=production_hm_delta_db --table-name=motor
aws glue get-partition-indexes --database-name=production_motor_db --table-name=motor

aws-glue-partition-index-create:
aws glue create-partition-index --database-name=production_hm_delta_db --table-name=motor --partition-index=Keys=_event_id,IndexName=_event_id_idx
aws glue create-partition-index --database-name=production_motor_db --table-name=motor --partition-index=Keys=_event_id,IndexName=_event_id_idx

aws-glue-partition-index-delete:
aws glue delete-partition-index --database-name=production_hm_delta_db --table-name=motor --index-name=_event_id_idx
aws glue delete-partition-index --database-name=production_motor_db --table-name=motor --index-name=_event_id_idx

create-adsb-db-delta-tables:
bash bin/create_adsb_db_delta_tables.sh
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# https://docs.prefect.io/concepts/deployments/?h=.prefectignore#build-the-deployment
# https://docs.prefect.io/v3/deploy/infrastructure-concepts/store-flow-code#include-or-exclude-files-from-storage

# Anywhere
**/*.aliases
Expand Down
Loading
Loading