Skip to content

Commit

Permalink
Add multi-node on cloud
Browse files Browse the repository at this point in the history
  • Loading branch information
satyaog committed Apr 3, 2024
1 parent a18a034 commit 5267334
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 17 deletions.
36 changes: 36 additions & 0 deletions config/examples/cloud-multinodes-system.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
system:
# Nodes list
nodes:
# Alias used to reference the node
- name: manager
# Use 1.1.1.1 as an ip placeholder
ip: 1.1.1.1
# Use this node as the master node or not
main: true
# User to use in remote milabench operations
user: user

- name: node1
ip: 1.1.1.1
main: false
user: username

# Cloud instances profiles
cloud_profiles:
# The cloud platform to use in the form of {PLATFORM}__{PROFILE_NAME}
azure:
# covalent-azure-plugin args
username: ubuntu
size: Standard_B1s
location: eastus2
azure__free:
username: ubuntu
size: Standard_B2ats_v2
location: eastus2
ec2:
# covalent-ec2-plugin args
username: ubuntu
instance_type: t2.micro
volume_size: 8
region: us-east-2
state_id: 71669879043a3864225aabb94f91a2d4
22 changes: 9 additions & 13 deletions milabench/cli/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
def _flatten_cli_args(**kwargs):
return sum(
(
(f"--{k.replace('_', '-')}", *([v] if v else []))
(f"--{str(k).replace('_', '-')}", *([str(v)] if str(v) else []))
for k, v in kwargs.items()
), ()
)
Expand All @@ -35,21 +35,17 @@ def manage_cloud(pack, run_on, action="setup"):
"env":(lambda v: ("env",[".", v, ";", "conda", "activate", "milabench", "&&"])),
}
plan_params = deepcopy(pack.config["system"]["cloud_profiles"][run_on])
run_on, *profile = run_on.split("__")
profile = profile[0] if profile else ""

nodes = iter(enumerate(pack.config["system"]["nodes"]))
for i, n in nodes:
if n["ip"] != "1.1.1.1":
continue

while True:
try:
i, n = next(nodes)
if n["ip"] != "1.1.1.1":
continue
except StopIteration:
break

plan_params["state_prefix"] = plan_params.get("state_prefix", None) or "-".join([str(i), run_on])
plan_params["state_prefix"] = plan_params.get("state_prefix", None) or profile or run_on
plan_params["state_id"] = plan_params.get("state_id", None) or pack.config["hash"]
if i > 0:
plan_params["reuse_resource_group"] = None
plan_params["cluster_size"] = max(len(pack.config["system"]["nodes"]), i + 1)

import milabench.cli.covalent as cv

Expand All @@ -66,7 +62,7 @@ def manage_cloud(pack, run_on, action="setup"):
cmd = [
sys.executable,
"-m", cv.__name__,
run_on.split("__")[0],
run_on,
f"--{action}",
*_flatten_cli_args(**plan_params)
]
Expand Down
9 changes: 5 additions & 4 deletions milabench/cli/covalent/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,11 @@ def lattice(argv=(), deps_bash = None):
)
asyncio.run(_executor.setup({}))

assert _executor.hostname
print(f"hostname::>{_executor.hostname}")
print(f"username::>{_executor.username}")
print(f"ssh_key_file::>{_executor.ssh_key_file}")
assert _executor.hostnames
for hostname in _executor.hostnames:
print(f"hostname::>{hostname}")
print(f"username::>{_executor.username}")
print(f"ssh_key_file::>{_executor.ssh_key_file}")
finally:
result = ct.get_result(dispatch_id=dispatch_id, wait=False) if dispatch_id else None
results_dir = result.results_dir if result else ""
Expand Down

0 comments on commit 5267334

Please sign in to comment.