Skip to content

Commit

Permalink
Merge branch 'develop' into gtc-2953/ingest_to_gee
Browse files Browse the repository at this point in the history
  • Loading branch information
danscales authored Nov 4, 2024
2 parents 87f5157 + 14f51c5 commit 458125e
Show file tree
Hide file tree
Showing 27 changed files with 246 additions and 35 deletions.
1 change: 0 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# MyPy
.mypy_cache/*

# Docker Files
docker-compose.dev.yml
docker-compose.prod.yml
docker-compose.test.yml
Expand Down
29 changes: 29 additions & 0 deletions app/models/orm/migrations/versions/604bf4e66c2b_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Add content_date_description to version_metadata
Revision ID: 604bf4e66c2b
Revises: ef3392e8e054
Create Date: 2024-10-31 16:52:56.571782
"""
from alembic import op
import sqlalchemy as sa
import sqlalchemy_utils
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = '604bf4e66c2b'
down_revision = 'ef3392e8e054'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('version_metadata', sa.Column('content_date_description', sa.String(), nullable=True))
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('version_metadata', 'content_date_description')
# ### end Alembic commands ###
35 changes: 35 additions & 0 deletions app/models/orm/migrations/versions/ef3392e8e054_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""update resolution metadata fields
Revision ID: ef3392e8e054
Revises: d767b6dd2c4c
Create Date: 2024-09-10 14:19:43.424752
"""
from alembic import op
import sqlalchemy as sa
import sqlalchemy_utils
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = 'ef3392e8e054'
down_revision = 'd767b6dd2c4c'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('dataset_metadata', 'resolution', nullable=True, new_column_name='spatial_resolution')
op.add_column('dataset_metadata', sa.Column('resolution_description', sa.String(), nullable=True))
op.alter_column('version_metadata', 'resolution', nullable=True, new_column_name='spatial_resolution')
op.add_column('version_metadata', sa.Column('resolution_description', sa.String(), nullable=True))
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column('dataset_metadata', 'spatial_resolution', nullable=True, new_column_name='resolution')
op.drop_column('version_metadata', 'resolution_description')
op.alter_column('version_metadata', 'spatial_resolution', nullable=True, new_column_name='resolution')
op.drop_column('dataset_metadata', 'resolution_description')
# ### end Alembic commands ###
3 changes: 2 additions & 1 deletion app/models/orm/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

class MetadataMixin:
title = db.Column(db.String)
resolution = db.Column(db.Numeric)
spatial_resolution = db.Column(db.Numeric)
resolution_description = db.Column(db.String)
geographic_coverage = db.Column(db.String)
update_frequency = db.Column(db.String)
citation = db.Column(db.String)
Expand Down
1 change: 1 addition & 0 deletions app/models/orm/version_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class VersionMetadata(Base, MetadataMixin):
version = db.Column(db.String, nullable=False)
content_date = db.Column(db.Date)
content_start_date = db.Column(db.Date)
content_date_description = db.Column(db.String)
content_end_date = db.Column(db.Date)
last_update = db.Column(db.Date)
description = db.Column(db.String)
Expand Down
5 changes: 3 additions & 2 deletions app/models/pydantic/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from ...settings.globals import (
AURORA_JOB_QUEUE,
ON_DEMAND_COMPUTE_JOB_QUEUE,
DATA_LAKE_JOB_QUEUE,
DEFAULT_JOB_DURATION,
GDAL_PYTHON_JOB_DEFINITION,
Expand Down Expand Up @@ -138,9 +139,9 @@ class PixETLJob(Job):


class GDALCOGJob(Job):
"""Use for creating COG files using GDAL Python docker in PixETL queue."""
"""Use for creating COG files using GDAL Python docker in on-demand compute queue."""

job_queue = PIXETL_JOB_QUEUE
job_queue = ON_DEMAND_COMPUTE_JOB_QUEUE
job_definition = GDAL_PYTHON_JOB_DEFINITION
vcpus = 8
memory = 64000
Expand Down
16 changes: 13 additions & 3 deletions app/models/pydantic/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@


class CommonMetadata(BaseModel):
resolution: Optional[Union[int, float]]
spatial_resolution: Optional[Union[int, float]]
resolution_description: Optional[str]
geographic_coverage: Optional[str]
update_frequency: Optional[str]
scale: Optional[str]
Expand All @@ -21,7 +22,8 @@ class Config:
schema_extra = {
"examples": [
{
"resolution": 10,
"spatial_resolution": 10,
"resolution_description": "10 meters",
"geographic_coverage": "Amazon Basin",
"update_frequency": "Updated daily, image revisit time every 5 days",
"scale": "regional",
Expand Down Expand Up @@ -110,7 +112,10 @@ class VersionMetadata(CommonMetadata):
None,
description="Date range covered by the content",
)

content_date_description: Optional[str] = Field(
None,
description="Date of content to display",
)
last_update: Optional[date] = Field(
None,
description="Date the data were last updated",
Expand All @@ -128,6 +133,7 @@ class Config:
"start_date": "2000-01-01", # TODO fix date
"end_date": "2021-04-06",
},
"content_date_description": "2000 - present",
}
]
}
Expand Down Expand Up @@ -157,6 +163,10 @@ class VersionMetadataUpdate(VersionMetadataIn):
None,
description="Date range covered by the content",
)
content_date_description: Optional[str] = Field(
None,
description="Date of content to display",
)

last_update: Optional[date] = Field(
None,
Expand Down
3 changes: 2 additions & 1 deletion app/routes/datasets/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ async def get_version_assets(
description="The number of assets per page. Default is `10`.",
),
) -> Union[PaginatedAssetsResponse, AssetsResponse]:
"""Get all assets for a given dataset version.
"""Get all assets for a given dataset version. The list of assets
is sorted by the creation time of each asset.
Will attempt to paginate if `page[size]` or `page[number]` is
provided. Otherwise, it will attempt to return the entire list of
Expand Down
4 changes: 3 additions & 1 deletion app/routes/datasets/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@
async def get_version(
*, dv: Tuple[str, str] = Depends(dataset_version_dependency)
) -> VersionResponse:
"""Get basic metadata for a given version."""
"""Get basic metadata for a given version. The list of assets is sorted by
the creation time of each asset."""

dataset, version = dv
row: ORMVersion = await versions.get_version(dataset, version)
Expand Down Expand Up @@ -536,6 +537,7 @@ async def _version_response(
.where(ORMAsset.dataset == dataset)
.where(ORMAsset.version == version)
.where(ORMAsset.status == AssetStatus.saved)
.order_by(ORMAsset.created_on)
.gino.all()
)
data = Version.from_orm(data).dict(by_alias=True)
Expand Down
1 change: 1 addition & 0 deletions app/settings/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@
MAX_MEM = config("MAX_MEM", cast=int, default=760000)
PIXETL_JOB_DEFINITION = config("PIXETL_JOB_DEFINITION", cast=str)
PIXETL_JOB_QUEUE = config("PIXETL_JOB_QUEUE", cast=str)
ON_DEMAND_COMPUTE_JOB_QUEUE = config("ON_DEMAND_COMPUTE_JOB_QUEUE", cast=str)
PIXETL_CORES = config("PIXETL_CORES", cast=int, default=48)
PIXETL_MAX_MEM = config("PIXETL_MAX_MEM", cast=int, default=380000)
PIXETL_DEFAULT_RESAMPLING = config(
Expand Down
1 change: 1 addition & 0 deletions docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ services:
- TILE_CACHE_CLUSTER=tile_cache_cluster
- TILE_CACHE_SERVICE=tile_cache_service
- PIXETL_JOB_QUEUE=pixetl_jq
- ON_DEMAND_COMPUTE_JOB_QUEUE=cogify_jq
- API_URL=http://app_dev:80
- RASTER_ANALYSIS_LAMBDA_NAME=raster-analysis-tiled_raster_analysis-default
- RW_API_URL=https://staging-api.resourcewatch.org
Expand Down
1 change: 1 addition & 0 deletions docker-compose.prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ services:
- DATA_LAKE_JOB_QUEUE=data_lake_jq
- TILE_CACHE_JOB_QUEUE=tile_cache_jq
- PIXETL_JOB_QUEUE=pixetl_jq
- ON_DEMAND_COMPUTE_JOB_QUEUE=cogify_jq
- RASTER_ANALYSIS_LAMBDA_NAME=raster_analysis
- API_URL="http://app_dev:80"
- RW_API_URL=https://api.resourcewatch.org
Expand Down
1 change: 1 addition & 0 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ services:
- TILE_CACHE_CLUSTER=tile_cache_cluster
- TILE_CACHE_SERVICE=tile_cache_service
- PIXETL_JOB_QUEUE=pixetl_jq
- ON_DEMAND_COMPUTE_JOB_QUEUE=cogify_jq
- PIXETL_CORES=1
- MAX_CORES=1
- NUM_PROCESSES=1
Expand Down
29 changes: 19 additions & 10 deletions terraform/data.tf
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ data "template_file" "container_definition" {
tile_cache_job_queue = module.batch_job_queues.tile_cache_job_queue_arn
pixetl_job_definition = module.batch_job_queues.pixetl_job_definition_arn
pixetl_job_queue = module.batch_job_queues.pixetl_job_queue_arn
on_demand_compute_job_queue = module.batch_job_queues.on_demand_compute_job_queue_arn
raster_analysis_lambda_name = "raster-analysis-tiled_raster_analysis-default"
raster_analysis_sfn_arn = data.terraform_remote_state.raster_analysis_lambda.outputs.raster_analysis_state_machine_arn
service_url = local.service_url
Expand Down Expand Up @@ -95,15 +96,16 @@ data "template_file" "container_definition" {
data "template_file" "task_batch_policy" {
template = file("${path.root}/templates/run_batch_policy.json.tmpl")
vars = {
aurora_job_definition_arn = module.batch_job_queues.aurora_job_definition_arn
aurora_job_queue_arn = module.batch_job_queues.aurora_job_queue_arn
aurora_job_queue_fast_arn = module.batch_job_queues.aurora_job_queue_fast_arn
data_lake_job_definition_arn = module.batch_job_queues.data_lake_job_definition_arn
data_lake_job_queue_arn = module.batch_job_queues.data_lake_job_queue_arn
tile_cache_job_definition_arn = module.batch_job_queues.tile_cache_job_definition_arn
tile_cache_job_queue_arn = module.batch_job_queues.tile_cache_job_queue_arn
pixetl_job_definition_arn = module.batch_job_queues.pixetl_job_definition_arn
pixetl_job_queue_arn = module.batch_job_queues.pixetl_job_queue_arn
aurora_job_definition_arn = module.batch_job_queues.aurora_job_definition_arn
aurora_job_queue_arn = module.batch_job_queues.aurora_job_queue_arn
aurora_job_queue_fast_arn = module.batch_job_queues.aurora_job_queue_fast_arn
data_lake_job_definition_arn = module.batch_job_queues.data_lake_job_definition_arn
data_lake_job_queue_arn = module.batch_job_queues.data_lake_job_queue_arn
tile_cache_job_definition_arn = module.batch_job_queues.tile_cache_job_definition_arn
tile_cache_job_queue_arn = module.batch_job_queues.tile_cache_job_queue_arn
pixetl_job_definition_arn = module.batch_job_queues.pixetl_job_definition_arn
pixetl_job_queue_arn = module.batch_job_queues.pixetl_job_queue_arn
on_demand_compute_job_queue_arn = module.batch_job_queues.on_demand_compute_job_queue_arn
}
depends_on = [
module.batch_job_queues.aurora_job_definition,
Expand Down Expand Up @@ -190,4 +192,11 @@ data "template_file" "step_function_policy" {
vars = {
raster_analysis_state_machine_arn = data.terraform_remote_state.raster_analysis_lambda.outputs.raster_analysis_state_machine_arn
}
}
}

# Hash of the contents of the FastAPI app docker. The docker commands run in the main
# directory (parent directory of terraform directory), and the Docker file is in the
# same directory.
data "external" "hash" {
program = ["${path.root}/scripts/hash.sh", "${path.root}/../", "."]
}
43 changes: 36 additions & 7 deletions terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,13 @@ locals {
aurora_instance_class = data.terraform_remote_state.core.outputs.aurora_cluster_instance_class
aurora_max_vcpus = local.aurora_instance_class == "db.t3.medium" ? 2 : local.aurora_instance_class == "db.r6g.large" ? 2 : local.aurora_instance_class == "db.r6g.xlarge" ? 4 : local.aurora_instance_class == "db.r6g.2xlarge" ? 8 : local.aurora_instance_class == "db.r6g.4xlarge" ? 16 : local.aurora_instance_class == "db.r6g.8xlarge" ? 32 : local.aurora_instance_class == "db.r6g.16xlarge" ? 64 : local.aurora_instance_class == "db.r5.large" ? 2 : local.aurora_instance_class == "db.r5.xlarge" ? 4 : local.aurora_instance_class == "db.r5.2xlarge" ? 8 : local.aurora_instance_class == "db.r5.4xlarge" ? 16 : local.aurora_instance_class == "db.r5.8xlarge" ? 32 : local.aurora_instance_class == "db.r5.12xlarge" ? 48 : local.aurora_instance_class == "db.r5.16xlarge" ? 64 : local.aurora_instance_class == "db.r5.24xlarge" ? 96 : ""
service_url = var.environment == "dev" ? "http://${module.fargate_autoscaling.lb_dns_name}" : var.service_url
container_tag = substr(var.git_sha, 0, 7)
# The container_registry module only pushes a new Docker image if the docker hash
# computed by its hash.sh script has changed. So, we make the container tag exactly
# be that hash. Therefore, we will know that either the previous docker with the
# same contents and tag will already exist, if nothing has changed in the docker
# image, or the container registry module will push a new docker with the tag we
# want.
container_tag = lookup(data.external.hash.result, "hash")
lb_dns_name = coalesce(module.fargate_autoscaling.lb_dns_name, var.lb_dns_name)
}

Expand Down Expand Up @@ -174,21 +180,44 @@ module "batch_data_lake_writer" {
tags = local.batch_tags
use_ephemeral_storage = true
# SPOT is actually the default, this is just a placeholder until GTC-1791 is done
launch_type = "SPOT"
instance_types = [
"r6id.large", "r6id.xlarge", "r6id.2xlarge", "r6id.4xlarge", "r6id.8xlarge", "r6id.12xlarge", "r6id.16xlarge", "r6id.24xlarge",
"r5ad.large", "r5ad.xlarge", "r5ad.2xlarge", "r5ad.4xlarge", "r5ad.8xlarge", "r5ad.12xlarge", "r5ad.16xlarge", "r5ad.24xlarge",
"r5d.large", "r5d.xlarge", "r5d.2xlarge", "r5d.4xlarge", "r5d.8xlarge", "r5d.12xlarge", "r5d.16xlarge", "r5d.24xlarge"
]
launch_type = "SPOT"
instance_types = var.data_lake_writer_instance_types
compute_environment_name = "data_lake_writer"
}

module "batch_cogify" {
source = "git::https://github.com/wri/gfw-terraform-modules.git//terraform/modules/compute_environment?ref=v0.4.2.3"
ecs_role_policy_arns = [
aws_iam_policy.query_batch_jobs.arn,
aws_iam_policy.s3_read_only.arn,
data.terraform_remote_state.core.outputs.iam_policy_s3_write_data-lake_arn,
data.terraform_remote_state.core.outputs.secrets_postgresql-reader_policy_arn,
data.terraform_remote_state.core.outputs.secrets_postgresql-writer_policy_arn,
data.terraform_remote_state.core.outputs.secrets_read-gfw-gee-export_policy_arn
]
key_pair = var.key_pair
max_vcpus = var.data_lake_max_vcpus
project = local.project
security_group_ids = [
data.terraform_remote_state.core.outputs.default_security_group_id,
data.terraform_remote_state.core.outputs.postgresql_security_group_id
]
subnets = data.terraform_remote_state.core.outputs.private_subnet_ids
suffix = local.name_suffix
tags = local.batch_tags
use_ephemeral_storage = true
launch_type = "EC2"
instance_types = var.data_lake_writer_instance_types
compute_environment_name = "batch_cogify"
}

module "batch_job_queues" {
source = "./modules/batch"
aurora_compute_environment_arn = module.batch_aurora_writer.arn
data_lake_compute_environment_arn = module.batch_data_lake_writer.arn
pixetl_compute_environment_arn = module.batch_data_lake_writer.arn
tile_cache_compute_environment_arn = module.batch_data_lake_writer.arn
cogify_compute_environment_arn = module.batch_cogify.arn
environment = var.environment
name_suffix = local.name_suffix
project = local.project
Expand Down
9 changes: 8 additions & 1 deletion terraform/modules/batch/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ resource "aws_batch_job_queue" "pixetl" {
depends_on = [var.pixetl_compute_environment_arn]
}

resource "aws_batch_job_queue" "on_demand" {
name = substr("${var.project}-on-demand-job-queue${var.name_suffix}", 0, 64)
state = "ENABLED"
priority = 1
compute_environments = [var.cogify_compute_environment_arn]
depends_on = [var.cogify_compute_environment_arn]
}

resource "aws_batch_job_definition" "tile_cache" {
name = substr("${var.project}-tile_cache${var.name_suffix}", 0, 64)
Expand Down Expand Up @@ -190,4 +197,4 @@ data "template_file" "ecs-task_assume" {
vars = {
service = "ecs-tasks"
}
}
}
6 changes: 5 additions & 1 deletion terraform/modules/batch/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ output "pixetl_job_queue_arn" {
value = aws_batch_job_queue.pixetl.arn
}

output "on_demand_compute_job_queue_arn" {
value = aws_batch_job_queue.on_demand.arn
}

output "tile_cache_job_definition_arn" {
value = aws_batch_job_definition.tile_cache.arn
}
Expand All @@ -48,4 +52,4 @@ output "tile_cache_job_definition" {

output "tile_cache_job_queue_arn" {
value = aws_batch_job_queue.tile_cache.arn
}
}
1 change: 1 addition & 0 deletions terraform/modules/batch/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ variable "project" { type = string }
variable "name_suffix" { type = string }
variable "aurora_compute_environment_arn" { type = string }
variable "data_lake_compute_environment_arn" { type = string }
variable "cogify_compute_environment_arn" { type = string }
variable "tile_cache_compute_environment_arn" { type = string }
variable "pixetl_compute_environment_arn" { type = string }
variable "gdal_repository_url" { type = string }
Expand Down
Loading

0 comments on commit 458125e

Please sign in to comment.