-
Notifications
You must be signed in to change notification settings - Fork 158
/
Copy pathtutorial-starccm-slurm.yaml
132 lines (116 loc) · 3.47 KB
/
tutorial-starccm-slurm.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
blueprint_name: starccm-on-slurm
toolkit_modules_url: github.com/GoogleCloudPlatform/cluster-toolkit
toolkit_modules_version: v1.41.0
vars:
project_id: ## Set GCP Project ID Here ##
deployment_name: starccm-slurm
region: us-central1
zone: us-central1-c
terraform_providers:
google:
source: hashicorp/google
version: 5.45.0
configuration:
project: $(vars.project_id)
region: $(vars.region)
zone: $(vars.zone)
google-beta:
source: hashicorp/google-beta
version: 5.45.0
configuration:
project: $(vars.project_id)
region: $(vars.region)
zone: $(vars.zone)
# Documentation for each of the modules used below can be found at
# https://github.com/GoogleCloudPlatform/hpc-toolkit/blob/main/modules/README.md
deployment_groups:
- group: primary
modules:
# Source is an embedded module, denoted by "modules/*" without ./, ../, /
# as a prefix. To refer to a local module, prefix with ./, ../ or /
- id: network1
source: modules/network/vpc
- id: homefs
source: modules/file-system/filestore
use: [network1]
settings:
local_mount: /home
- id: login-script
kind: terraform
source: modules/scripts/startup-script
settings:
configure_ssh_host_patterns: ["star*"]
- id: compute-script
source: modules/scripts/startup-script
settings:
configure_ssh_host_patterns: ["star*"]
runners:
- type: shell
content: |
#!/bin/bash
google_mpi_tuning --hpcthroughput
google_mpi_tuning --nomitigation
destination: /tmp/tune-mpi.sh
- id: debug_node_group
source: community/modules/compute/schedmd-slurm-gcp-v5-node-group
settings:
node_count_dynamic_max: 4
machine_type: n2-standard-2
- id: debug_partition
source: community/modules/compute/schedmd-slurm-gcp-v5-partition
use:
- network1
- homefs
- debug_node_group
- compute-script
settings:
partition_name: debug
is_default: true
- id: compute_node_group
source: community/modules/compute/schedmd-slurm-gcp-v5-node-group
settings:
bandwidth_tier: "gvnic_enabled"
disable_public_ips: false
machine_type: c2-standard-60
node_count_dynamic_max: 20
- id: compute_partition
source: community/modules/compute/schedmd-slurm-gcp-v5-partition
use:
- network1
- homefs
- compute_node_group
- compute-script
settings:
partition_name: compute
- id: slurm_controller
source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller
use:
- network1
- debug_partition
- compute_partition
- homefs
settings:
disable_controller_public_ips: true
- id: slurm_login
source: community/modules/scheduler/schedmd-slurm-gcp-v5-login
use:
- network1
- slurm_controller
- login-script
settings:
machine_type: n2-standard-4
disable_login_public_ips: true