Skip to content
This repository was archived by the owner on Feb 6, 2023. It is now read-only.

Commit

Permalink
Merge pull request #48 from niall-turbitt/dev/conf_refactor
Browse files Browse the repository at this point in the history
Dev/conf refactor
  • Loading branch information
niall-turbitt authored Jul 25, 2022
2 parents 3631af4 + 5e2cbbd commit 313e863
Show file tree
Hide file tree
Showing 30 changed files with 803 additions and 180 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/onpullrequest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ jobs:
- name: Deploy integration test [staging environment]
run: |
dbx deploy --jobs=telco-churn-sample-integration-test --environment=staging --files-only
dbx deploy --jobs=STAGING-telco-churn-sample-integration-test --environment=staging --files-only
- name: Run integration test [staging environment]
run: |
dbx launch --job=telco-churn-sample-integration-test --environment=staging --as-run-submit --trace
dbx launch --job=STAGING-telco-churn-sample-integration-test --environment=staging --as-run-submit --trace
12 changes: 6 additions & 6 deletions .github/workflows/onrelease.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,17 @@ jobs:
run: |
pip install -r unit-requirements.txt
- name: Deploy telco-churn-model-train job [prod environment]
- name: Deploy PROD-telco-churn-model-train job [prod environment]
run: |
dbx deploy --deployment-file conf/deployment.yml --jobs=telco-churn-model-train --environment=prod
dbx deploy --deployment-file conf/deployment.yml --jobs=PROD-telco-churn-model-train --environment=prod
- name: Deploy telco-churn-model-deployment job [prod environment]
- name: Deploy PROD-telco-churn-model-deployment job [prod environment]
run: |
dbx deploy --deployment-file conf/deployment.yml --jobs=telco-churn-model-deployment --environment=prod
dbx deploy --deployment-file conf/deployment.yml --jobs=PROD-telco-churn-model-deployment --environment=prod
- name: Deploy telco-churn-model-inference-batch job [prod environment]
- name: Deploy PROD-telco-churn-model-inference-batch job [prod environment]
run: |
dbx deploy --deployment-file conf/deployment.yml --jobs=telco-churn-model-inference-batch --environment=prod
dbx deploy --deployment-file conf/deployment.yml --jobs=PROD-telco-churn-model-inference-batch --environment=prod
- name: Create Release
id: create_release
Expand Down
1 change: 0 additions & 1 deletion conf/.staging.env

This file was deleted.

112 changes: 56 additions & 56 deletions conf/deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ custom:
# Cluster configs for each environment
default-cluster-spec: &default-cluster-spec
spark_version: '11.0.x-cpu-ml-scala2.12'
node_type_id: 'i3.xlarge'
driver_node_type_id: 'i3.xlarge'
# node_type_id: 'i3.xlarge'
# driver_node_type_id: 'i3.xlarge'
num_workers: 1
# To reduce start up time for each job, it is advisable to use a cluster pool. To do so involves supplying the following
# two fields with a pool_id to acquire both the driver and instances from.
# If driver_instance_pool_id and instance_pool_id are set, both node_type_id and driver_node_type_id CANNOT be supplied.
# As such, if providing a pool_id for driver and worker instances, please ensure that node_type_id and driver_node_type_id are not present
# driver_instance_pool_id: '0617-151415-bells2-pool-hh7h6tjm'
# instance_pool_id: '0617-151415-bells2-pool-hh7h6tjm'
driver_instance_pool_id: '0617-151415-bells2-pool-hh7h6tjm'
instance_pool_id: '0617-151415-bells2-pool-hh7h6tjm'

dev-cluster-config: &dev-cluster-config
new_cluster:
Expand All @@ -32,124 +32,124 @@ environments:
dev:
strict_path_adjustment_policy: true
jobs:
- name: 'telco-churn-demo-setup'
- name: 'DEV-telco-churn-demo-setup'
<<: *dev-cluster-config
spark_python_task:
python_file: 'file://telco_churn/jobs/demo_setup_job.py'
python_file: 'file://telco_churn/pipelines/demo_setup_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/.dev.env',
'--conf-file', 'file:fuse://conf/job_configs/demo_setup.yml']
- name: 'telco-churn-feature-table-creation'
'--env', 'file:fuse://conf/dev/.dev.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/demo_setup.yml']
- name: 'DEV-telco-churn-feature-table-creation'
<<: *dev-cluster-config
spark_python_task:
python_file: 'file://telco_churn/jobs/feature_table_creator_job.py'
python_file: 'file://telco_churn/pipelines/feature_table_creator_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/.dev.env',
'--conf-file', 'file:fuse://conf/job_configs/feature_table_creation.yml']
- name: 'telco-churn-model-train'
'--env', 'file:fuse://conf/dev/.dev.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/feature_table_creator.yml']
- name: 'DEV-telco-churn-model-train'
<<:
- *dev-cluster-config
spark_python_task:
python_file: 'file://telco_churn/jobs/model_train_job.py'
python_file: 'file://telco_churn/pipelines/model_train_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/.dev.env',
'--conf-file', 'file:fuse://conf/job_configs/model_train.yml']
- name: 'telco-churn-model-deployment'
'--env', 'file:fuse://conf/dev/.dev.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/model_train.yml']
- name: 'DEV-telco-churn-model-deployment'
<<:
- *dev-cluster-config
spark_python_task:
python_file: 'file://telco_churn/jobs/model_deployment_job.py'
python_file: 'file://telco_churn/pipelines/model_deployment_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/.dev.env',
'--conf-file', 'file:fuse://conf/job_configs/model_deployment.yml']
- name: 'telco-churn-model-inference-batch'
'--env', 'file:fuse://conf/dev/.dev.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/model_deployment.yml']
- name: 'DEV-telco-churn-model-inference-batch'
<<:
- *dev-cluster-config
spark_python_task:
python_file: 'file://telco_churn/jobs/model_inference_job.py'
python_file: 'file://telco_churn/pipelines/model_inference_batch_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/.dev.env',
'--conf-file', 'file:fuse://conf/job_configs/model_inference_batch.yml']
- name: 'telco-churn-sample-integration-test'
'--env', 'file:fuse://conf/dev/.dev.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/model_inference_batch.yml']
- name: 'DEV-telco-churn-sample-integration-test'
<<:
- *dev-cluster-config
spark_python_task:
python_file: 'file://tests/integration/sample_test.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/.dev.env',
'--conf-file', 'file:fuse://conf/job_configs/sample_test.yml' ]
'--env', 'file:fuse://conf/dev/.dev.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/sample_test.yml']

staging:
strict_path_adjustment_policy: true
jobs:
- name: 'telco-churn-sample-integration-test'
- name: 'STAGING-telco-churn-sample-integration-test'
<<:
- *staging-cluster-config
spark_python_task:
python_file: 'file://tests/integration/sample_test.py'
parameters: ['--env', 'file:fuse://conf/.staging.env',
'--conf-file', 'file:fuse://conf/job_configs/sample_test.yml' ]
parameters: ['--env', 'file:fuse://conf/staging/.staging.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/sample_test.yml']

prod:
strict_path_adjustment_policy: true
jobs:
- name: 'telco-churn-demo-setup'
- name: 'PROD-telco-churn-demo-setup'
<<: *prod-cluster-config
spark_python_task:
python_file: 'file://telco_churn/jobs/demo_setup_job.py'
python_file: 'file://telco_churn/pipelines/demo_setup_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/.prod.env',
'--conf-file', 'file:fuse://conf/job_configs/demo_setup.yml' ]
- name: 'telco-churn-initial-model-train-register'
'--env', 'file:fuse://conf/prod/.prod.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/demo_setup.yml']
- name: 'PROD-telco-churn-initial-model-train-register'
tasks:
- task_key: 'demo-setup'
<<:
- *prod-cluster-config
spark_python_task:
python_file: 'file://telco_churn/jobs/demo_setup_job.py'
python_file: 'file://telco_churn/pipelines/demo_setup_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/.prod.env',
'--conf-file', 'file:fuse://conf/job_configs/demo_setup.yml' ]
'--env', 'file:fuse://conf/prod/.prod.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/demo_setup.yml']
- task_key: 'feature-table-creation'
<<: *prod-cluster-config
depends_on:
- task_key: 'demo-setup'
spark_python_task:
python_file: 'file://telco_churn/jobs/feature_table_creator_job.py'
python_file: 'file://telco_churn/pipelines/feature_table_creator_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/.prod.env',
'--conf-file', 'file:fuse://conf/job_configs/feature_table_creation.yml']
'--env', 'file:fuse://conf/prod/.prod.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/feature_table_creator.yml']
- task_key: 'model-train'
<<: *prod-cluster-config
depends_on:
- task_key: 'demo-setup'
- task_key: 'feature-table-creation'
spark_python_task:
python_file: 'file://telco_churn/jobs/model_train_job.py'
python_file: 'file://telco_churn/pipelines/model_train_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/.prod.env',
'--conf-file', 'file:fuse://conf/job_configs/model_train.yml']
- name: 'telco-churn-model-train'
'--env', 'file:fuse://conf/prod/.prod.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/model_train.yml']
- name: 'PROD-telco-churn-model-train'
<<:
- *prod-cluster-config
spark_python_task:
python_file: 'file://telco_churn/jobs/model_train_job.py'
python_file: 'file://telco_churn/pipelines/model_train_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/.prod.env',
'--conf-file', 'file:fuse://conf/job_configs/model_train.yml']
- name: 'telco-churn-model-deployment'
'--env', 'file:fuse://conf/prod/.prod.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/model_train.yml']
- name: 'PROD-telco-churn-model-deployment'
<<:
- *prod-cluster-config
spark_python_task:
python_file: 'file://telco_churn/jobs/model_deployment_job.py'
python_file: 'file://telco_churn/pipelines/model_deployment_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/.prod.env',
'--conf-file', 'file:fuse://conf/job_configs/model_deployment.yml']
- name: 'telco-churn-model-inference-batch'
'--env', 'file:fuse://conf/prod/.prod.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/model_deployment.yml']
- name: 'PROD-telco-churn-model-inference-batch'
<<:
- *prod-cluster-config
spark_python_task:
python_file: 'file://telco_churn/jobs/model_inference_job.py'
python_file: 'file://telco_churn/pipelines/model_inference_batch_job.py'
parameters: ['--base-data-params', 'file:fuse://conf/.base_data_params.env',
'--env', 'file:fuse://conf/.prod.env',
'--conf-file', 'file:fuse://conf/job_configs/model_inference_batch.yml']
'--env', 'file:fuse://conf/prod/.prod.env',
'--conf-file', 'file:fuse://conf/pipeline_configs/model_inference_batch.yml']
8 changes: 2 additions & 6 deletions conf/.dev.env → conf/dev/.dev.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
DEPLOYMENT_ENV=dev
env=dev

// Global MLflow params for dev
model_train_experiment_path='/Shared/e2e_mlops/dev/telco_churn_experiment_dev'
Expand All @@ -13,13 +13,9 @@ labels_table_database_name='e2e_mlops_dev'
//tmp directory for demo purposes
labels_table_dbfs_path='dbfs:/tmp/e2e_mlops/dev/churn_labels.delta'

// Batch inference input table params
inference_database_name='e2e_mlops_dev'

// Batch inference predictions table params
//tmp directory for demo purposes
predictions_table_dbfs_path='dbfs:/tmp/e2e_mlops/dev/churn_predictions.delta'
predictions_table_database_name='e2e_mlops_dev'
predictions_table_name = 'churn_predictions'

// Reference table params - table to use for comparing staging vs production models
reference_table_database_name='e2e_mlops_dev'
File renamed without changes.
File renamed without changes.
File renamed without changes.
10 changes: 3 additions & 7 deletions conf/.prod.env → conf/prod/.prod.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
DEPLOYMENT_ENV=prod
env=prod

// Global MLflow params for prod
model_train_experiment_path='/Shared/e2e_mlops/prod/telco_churn_experiment_prod'
Expand All @@ -13,13 +13,9 @@ labels_table_database_name='e2e_mlops_prod'
// tmp directory for demo purposes
labels_table_dbfs_path='dbfs:/tmp/e2e_mlops/prod/churn_labels.delta'

// Batch inference input table params
inference_database_name='e2e_mlops_prod'

// Batch inference predictions table params
// tmp directory for demo purposes
predictions_table_dbfs_path='dbfs:/tmp/e2e_mlops/prod/churn_predictions.delta'
predictions_table_database_name='e2e_mlops_prod'
predictions_table_database_name='e2e_mlops_dev'
predictions_table_name = 'churn_predictions'

// Reference table params - table to use for comparing staging vs production models
reference_table_database_name='e2e_mlops_prod'
Expand Down
1 change: 1 addition & 0 deletions conf/staging/.staging.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
env=staging
Loading

0 comments on commit 313e863

Please sign in to comment.