-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdatabricks.yml
107 lines (96 loc) · 3.41 KB
/
databricks.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# This is a Databricks asset bundle definition
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
bundle:
name: microsoft_wwi_demo
permissions:
- level: "CAN_MANAGE"
group_name: users
workspace:
root_path: /Shared/${workspace.current_user.userName}/${bundle.name}/${bundle.target}
variables:
dashboard_warehouse_id:
type: string
lookup:
warehouse: "Serverless Starter Warehouse"
# Do we need targets? Make sense to leave for future modifications
targets:
demo:
default: true
# Need to specify when wheel package is not pre-built, otherwise comment this block
artifacts:
raw:
type: whl
build: poetry build
path: ./raw
data_product:
type: whl
build: poetry build
path: ./data_product
# For passing wheel package to workspace
sync:
include:
- ./raw/dist/*.whl
- ./data_product/dist/*.whl
resources:
jobs:
microsoft_wwi_demo:
name: microsoft_wwi_demo
job_clusters:
- job_cluster_key: microsoft_wwi_job_cluster
new_cluster:
node_type_id: m5d.2xlarge
spark_version: 14.3.x-scala2.12
aws_attributes:
first_on_demand: 1
availability: SPOT_WITH_FALLBACK
zone_id: auto
spot_bid_price_percent: 100
ebs_volume_count: 2
ebs_volume_type: GENERAL_PURPOSE_SSD
ebs_volume_size: 100
spark_conf:
spark.databricks.delta.preview.enabled: "true"
spark_env_vars:
PYSPARK_PYTHON: /databricks/python3/bin/python3
enable_elastic_disk: false
data_security_mode: SINGLE_USER
runtime_engine: PHOTON
autoscale:
min_workers: 1
max_workers: 3
tasks:
- task_key: raw
job_cluster_key: microsoft_wwi_job_cluster
notebook_task:
notebook_path: ${workspace.root_path}/files/raw/bundle/databricks-notebooks/raw-microsoft-wwi/raw-microsoft-wwi
source: WORKSPACE
base_parameters:
catalog: wwi_demo
schema: 01_raw_microsoft_wwi
load_mode: incremental
load_datetime: "2015-01-01T00:00:00"
streaming_consumer_group: $Default
libraries:
- whl: ${workspace.root_path}/artifacts/.internal/raw_microsoft_wwi-1.4.0-py3-none-any.whl
- maven:
coordinates: com.microsoft.azure:spark-mssql-connector_2.12:1.2.0
- task_key: data_product
job_cluster_key: microsoft_wwi_job_cluster
depends_on:
- task_key: raw
notebook_task:
notebook_path: ${workspace.root_path}/files/data_product/bundle/databricks-notebooks/data-product-microsoft-wwi/data-product-microsoft-wwi
source: WORKSPACE
base_parameters:
catalog: wwi_demo
source_tables_schema: 01_raw_microsoft_wwi
destination_schema: 02_data_product_microsoft_wwi
libraries:
- whl: ${workspace.root_path}/artifacts/.internal/data_product_microsoft_wwi-1.6.0-py3-none-any.whl
- maven:
coordinates: com.microsoft.azure:spark-mssql-connector_2.12:1.2.0
dashboards:
microsoft_wwi:
display_name: "Microsoft WWI"
warehouse_id: ${var.dashboard_warehouse_id}
file_path: ./dashboards/microsoft_wwi.lvdash.json