Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(prefect): use ray to compute #24838

Merged
merged 1 commit into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ jobs:
context: ./data-orchestration/hm-prefect/workflows/calculate
file: ./data-orchestration/hm-prefect/workflows/calculate/Dockerfile
push: true
tags: ghcr.io/hongbo-miao/hm-prefect-calculate:latest
tags: ghcr.io/hongbo-miao/prefect-calculate:latest
cache-from: type=gha
cache-to: type=gha,mode=max
attempt_limit: 3
Expand Down Expand Up @@ -494,7 +494,7 @@ jobs:
context: ./data-orchestration/hm-prefect/workflows/daft-analysis
file: ./data-orchestration/hm-prefect/workflows/daft-analysis/Dockerfile
push: true
tags: ghcr.io/hongbo-miao/hm-prefect-daft-analysis:latest
tags: ghcr.io/hongbo-miao/prefect-daft-analysis:latest
cache-from: type=gha
cache-to: type=gha,mode=max
attempt_limit: 3
Expand Down Expand Up @@ -530,7 +530,7 @@ jobs:
context: ./data-orchestration/hm-prefect/workflows/greet
file: ./data-orchestration/hm-prefect/workflows/greet/Dockerfile
push: true
tags: ghcr.io/hongbo-miao/hm-prefect-greet:latest
tags: ghcr.io/hongbo-miao/prefect-greet:latest
cache-from: type=gha
cache-to: type=gha,mode=max
attempt_limit: 3
Expand Down Expand Up @@ -566,7 +566,7 @@ jobs:
context: ./data-orchestration/hm-prefect/workflows/print-platform
file: ./data-orchestration/hm-prefect/workflows/print-platform/Dockerfile
push: true
tags: ghcr.io/hongbo-miao/hm-prefect-print-platform:latest
tags: ghcr.io/hongbo-miao/prefect-print-platform:latest
cache-from: type=gha
cache-to: type=gha,mode=max
attempt_limit: 3
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/docker-prune.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,16 @@ jobs:
hm-kafka-opa-kafka-connect,
hm-keydb,
hm-opal-client,
hm-prefect-calculate,
hm-prefect-daft-analysis,
hm-prefect-greet,
hm-prefect-print-platform,
hm-rasa-action-server,
hm-spark-find-retired-people-python,
hm-spark-find-retired-people-scala,
hm-spark-ingest-from-s3-to-kafka,
hm-traefik,
hm-web,
prefect-calculate,
prefect-daft-analysis,
prefect-greet,
prefect-print-platform,
]
steps:
- name: Prune dangling Docker images
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,52 @@ module "harbor_config_system" {
project_creation_restriction = "adminonly"
}

# Config
data "aws_secretsmanager_secret" "hm_harbor_google_client_secret" {
name = "${var.environment}-hm-harbor-google-client"
}
data "aws_secretsmanager_secret_version" "hm_harbor_google_client_secret_version" {
secret_id = data.aws_secretsmanager_secret.hm_harbor_google_client_secret.id
}
module "harbor_config_google_auth" {
source = "../../../modules/harbor/hm_harbor_config_google_auth"
primary_auth_mode = true
oidc_client_id = "xxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.apps.googleusercontent.com"
oidc_client_secret = jsondecode(data.aws_secretsmanager_secret_version.hm_harbor_google_client_secret_version.secret_string)["secret"]
}

# Registries
module "harbor_registry_docker_hub" {
source = "../../../modules/harbor/hm_harbor_registry"
provider_name = "docker-hub"
name = "docker-hub"
endpoint_url = "https://hub.docker.com"
}

# Projects
module "harbor_project_docker_hub_proxy_cache" {
source = "../../../modules/harbor/hm_harbor_project"
name = "docker-hub-proxy-cache"
public = true
registry_id = module.harbor_registry_docker_hub.id
}
module "harbor_project_engineering" {
module "harbor_project_hm" {
source = "../../../modules/harbor/hm_harbor_project"
name = "engineering"
name = "hm"
public = false
}

data "aws_secretsmanager_secret" "hm_harbor_google_client_secret" {
name = "${var.environment}-hm-harbor-google-client"
# Robots
data "aws_secretsmanager_secret" "hm_harbor_hm_kubernetes_robot_secret" {
provider = aws.production
name = "${var.environment}-hm-harbor-hm-kubernetes-robot"
}
data "aws_secretsmanager_secret_version" "hm_harbor_google_client_secret_version" {
secret_id = data.aws_secretsmanager_secret.hm_harbor_google_client_secret.id
data "aws_secretsmanager_secret_version" "hm_harbor_hm_kubernetes_robot_secret_version" {
provider = aws.production
secret_id = data.aws_secretsmanager_secret.hm_harbor_hm_kubernetes_robot_secret.id
}
module "harbor_config_google_auth" {
source = "../../../modules/harbor/hm_harbor_config_google_auth"
primary_auth_mode = true
oidc_client_id = "xxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.apps.googleusercontent.com"
oidc_client_secret = jsondecode(data.aws_secretsmanager_secret_version.hm_harbor_google_client_secret_version.secret_string)["secret"]
module "harbor_robot_account_hm_kubernetes_robot" {
source = "../../../modules/harbor/hm_harbor_robot_account"
name = "hm-kubernetes-robot"
secret = jsondecode(data.aws_secretsmanager_secret_version.hm_harbor_hm_kubernetes_robot_secret_version.secret_string)["secret"]
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
provider "harbor" {
url = "https://harbor.hongbomiao.com"
}

provider "aws" {
alias = "production"
region = "us-west-2"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
terraform {
required_providers {
harbor = {
source = "goharbor/harbor"
}
}
}

# https://registry.terraform.io/providers/goharbor/harbor/latest/docs/resources/robot_account
resource "harbor_robot_account" "main" {
name = var.name
secret = var.secret
level = "system"
permissions {
access {
action = "pull"
resource = "repository"
}
kind = "project"
namespace = "*"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
variable "name" {
type = string
}
variable "secret" {
type = string
sensitive = true
}
12 changes: 6 additions & 6 deletions data-orchestration/hm-prefect/workflows/calculate/justfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ test-coverage:
uv run poe test-coverage

publish-development-docker-image:
docker build --platform=linux/amd64 --file=Dockerfile --tag=ghcr.io/hongbo-miao/hm-prefect-calculate:development . && \
docker push ghcr.io/hongbo-miao/hm-prefect-calculate:development
docker build --platform=linux/amd64 --file=Dockerfile --tag=harbor.hongbomiao.com/hm/prefect-calculate:development . && \
docker push harbor.hongbomiao.com/hm/prefect-calculate:development

publish-production-docker-image:
docker build --platform=linux/amd64 --file=Dockerfile --tag=ghcr.io/hongbo-miao/hm-prefect-calculate:latest . && \
docker push ghcr.io/hongbo-miao/hm-prefect-calculate:latest
docker build --platform=linux/amd64 --file=Dockerfile --tag=harbor.hongbomiao.com/hm/prefect-calculate:latest . && \
docker push harbor.hongbomiao.com/hm/prefect-calculate:latest

deploy-development:
uv run poe deploy --environment=development
Expand All @@ -31,7 +31,7 @@ deploy-production:
uv run poe deploy --environment=production

run-development:
uv run poe run -- calculate/hm-development-calculate
uv run poe run -- hm-calculate/development-hm-calculate

run-production:
uv run poe run -- calculate/hm-production-calculate
uv run poe run -- hm-calculate/production-hm-calculate
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ async def create_deployment(
docker_image_name: str,
deployment: dict[str, Any],
) -> None:
deployment_name = f"hm-{environment}-{config.BASE_WORKFLOW_NAME}"
deployment_name = f"{environment}-hm-{config.BASE_WORKFLOW_NAME}"
await workflow.deploy(
name=deployment_name,
work_pool_name=deployment["work_pool_name"],
Expand Down
12 changes: 6 additions & 6 deletions data-orchestration/hm-prefect/workflows/daft-analysis/justfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ test-coverage:
uv run poe test-coverage

publish-development-docker-image:
docker build --platform=linux/amd64 --file=Dockerfile --tag=ghcr.io/hongbo-miao/hm-prefect-daft-analysis:development . && \
docker push ghcr.io/hongbo-miao/hm-prefect-daft-analysis:development
docker build --platform=linux/amd64 --file=Dockerfile --tag=harbor.hongbomiao.com/hm/prefect-daft-analysis:development . && \
docker push harbor.hongbomiao.com/hm/prefect-daft-analysis:development

publish-production-docker-image:
docker build --platform=linux/amd64 --file=Dockerfile --tag=ghcr.io/hongbo-miao/hm-prefect-daft-analysis:latest . && \
docker push ghcr.io/hongbo-miao/hm-prefect-daft-analysis:latest
docker build --platform=linux/amd64 --file=Dockerfile --tag=harbor.hongbomiao.com/hm/prefect-daft-analysis:latest . && \
docker push harbor.hongbomiao.com/hm/prefect-daft-analysis:latest

deploy-development:
uv run poe deploy --environment=development
Expand All @@ -31,7 +31,7 @@ deploy-production:
uv run poe deploy --environment=production

run-development:
uv run poe run -- daft-analysis/hm-development-daft-analysis
uv run poe run -- hm-daft-analysis/development-hm-daft-analysis

run-production:
uv run poe run -- daft-analysis/hm-production-daft-analysis
uv run poe run -- hm-daft-analysis/production-hm-daft-analysis
Original file line number Diff line number Diff line change
@@ -1,18 +1,33 @@
import daft
import ray
from prefect import flow, get_run_logger


@flow
def daft_analysis() -> None:
def hm_daft_analysis() -> None:
logger = get_run_logger()

ray_address = "ray://hm-ray-cluster-head-svc.production-hm-ray-cluster:10001"
ray.init(
ray_address,
runtime_env={
"pip": [
"getdaft==0.4.6",
],
},
)
daft.context.set_runner_ray(ray_address)

df = daft.from_pydict(
{
"a": [1, 2, 3, 4, 5],
"b": [True, True, False, False, False],
"a": [3, 2, 5, 6, 1, 4],
"b": [True, False, False, True, True, False],
},
)
df = df.where(df["b"]).sort(df["a"])
df = df.collect()
logger.info(f"{df = }")


if __name__ == "__main__":
daft_analysis()
hm_daft_analysis()
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ async def create_deployment(
docker_image_name: str,
deployment: dict[str, Any],
) -> None:
deployment_name = f"hm-{environment}-{config.BASE_WORKFLOW_NAME}"
deployment_name = f"{environment}-hm-{config.BASE_WORKFLOW_NAME}"
await workflow.deploy(
name=deployment_name,
work_pool_name=deployment["work_pool_name"],
Expand Down
12 changes: 6 additions & 6 deletions data-orchestration/hm-prefect/workflows/greet/justfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ test-coverage:
uv run poe test-coverage

publish-development-docker-image:
docker build --platform=linux/amd64 --file=Dockerfile --tag=ghcr.io/hongbo-miao/hm-prefect-greet:development . && \
docker push ghcr.io/hongbo-miao/hm-prefect-greet:development
docker build --platform=linux/amd64 --file=Dockerfile --tag=harbor.hongbomiao.com/hm/prefect-greet:development . && \
docker push harbor.hongbomiao.com/hm/prefect-greet:development

publish-production-docker-image:
docker build --platform=linux/amd64 --file=Dockerfile --tag=ghcr.io/hongbo-miao/hm-prefect-greet:latest . && \
docker push ghcr.io/hongbo-miao/hm-prefect-greet:latest
docker build --platform=linux/amd64 --file=Dockerfile --tag=harbor.hongbomiao.com/hm/prefect-greet:latest . && \
docker push harbor.hongbomiao.com/hm/prefect-greet:latest

deploy-development:
uv run poe deploy --environment=development
Expand All @@ -31,7 +31,7 @@ deploy-production:
uv run poe deploy --environment=production

run-development:
uv run poe run -- greet/hm-development-greet
uv run poe run -- hm-greet/development-hm-greet

run-production:
uv run poe run -- greet/hm-production-greet
uv run poe run -- hm-greet/production-hm-greet
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ async def create_deployment(
docker_image_name: str,
deployment: dict[str, Any],
) -> None:
deployment_name = f"hm-{environment}-{config.BASE_WORKFLOW_NAME}"
deployment_name = f"{environment}-hm-{config.BASE_WORKFLOW_NAME}"
await workflow.deploy(
name=deployment_name,
work_pool_name=deployment["work_pool_name"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ test-coverage:
uv run poe test-coverage

publish-development-docker-image:
docker build --platform=linux/amd64 --file=Dockerfile --tag=ghcr.io/hongbo-miao/hm-prefect-print-platform:development . && \
docker push ghcr.io/hongbo-miao/hm-prefect-print-platform:development
docker build --platform=linux/amd64 --file=Dockerfile --tag=harbor.hongbomiao.com/hm/prefect-print-platform:development . && \
docker push harbor.hongbomiao.com/hm/prefect-print-platform:development

publish-production-docker-image:
docker build --platform=linux/amd64 --file=Dockerfile --tag=ghcr.io/hongbo-miao/hm-prefect-print-platform:latest . && \
docker push ghcr.io/hongbo-miao/hm-prefect-print-platform:latest
docker build --platform=linux/amd64 --file=Dockerfile --tag=harbor.hongbomiao.com/hm/prefect-print-platform:latest . && \
docker push harbor.hongbomiao.com/hm/prefect-print-platform:latest

deploy-development:
uv run poe deploy --environment=development
Expand All @@ -31,7 +31,7 @@ deploy-production:
uv run poe deploy --environment=production

run-development:
uv run poe run -- print-platform/hm-development-print-platform
uv run poe run -- hm-print-platform/development-hm-print-platform

run-production:
uv run poe run -- print-platform/hm-production-print-platform
uv run poe run -- hm-print-platform/production-hm-print-platform
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ async def create_deployment(
docker_image_name: str,
deployment: dict[str, Any],
) -> None:
deployment_name = f"hm-{environment}-{config.BASE_WORKFLOW_NAME}"
deployment_name = f"{environment}-hm-{config.BASE_WORKFLOW_NAME}"
await workflow.deploy(
name=deployment_name,
work_pool_name=deployment["work_pool_name"],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
sealed-secrets-seal:
cat secrets/hm-airbyte-secret.unsealed.yaml | \
kubeseal \
--controller-namespace=production-hm-sealed-secrets \
--controller-name=hm-sealed-secrets \
--format=yaml \
> kubernetes-manifests/hm-airbyte-secret.yaml
--controller-namespace=production-hm-sealed-secrets \
--controller-name=hm-sealed-secrets \
--format=yaml \
> kubernetes-manifests/hm-airbyte-secret.yaml

argo-cd-app-create:
argocd app create production-hm-airbyte --file=argo-cd-manifests/hm-airbyte-application.yaml
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
sealed-secrets-seal:
cat secrets/hm-clickhouse-secret.unsealed.yaml | \
kubeseal \
--controller-namespace=production-hm-sealed-secrets \
--controller-name=hm-sealed-secrets \
--format=yaml \
> kubernetes-manifests/hm-clickhouse-secret.yaml
--controller-namespace=production-hm-sealed-secrets \
--controller-name=hm-sealed-secrets \
--format=yaml \
> kubernetes-manifests/hm-clickhouse-secret.yaml

argo-cd-app-create:
argocd app create production-hm-clickhouse --file=argo-cd-manifests/hm-clickhouse-application.yaml
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
sealed-secrets-seal:
cat secrets/hm-grafana-secret.unsealed.yaml | \
kubeseal \
--controller-namespace=production-hm-sealed-secrets \
--controller-name=hm-sealed-secrets \
--format=yaml \
> kubernetes-manifests/hm-grafana-secret.yaml
--controller-namespace=production-hm-sealed-secrets \
--controller-name=hm-sealed-secrets \
--format=yaml \
> kubernetes-manifests/hm-grafana-secret.yaml

argo-cd-app-create:
argocd app create production-hm-grafana --file=argo-cd-manifests/hm-grafana-application.yaml
Expand Down
Loading
Loading