-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathfabfile.py
89 lines (72 loc) · 2.99 KB
/
fabfile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import yaml
from fabric import network
from fabric.api import env, run
from fabric.state import connections
# Load in server config, here's a quick example:
# role_name:
# hosts:
# broker_url: pyamqp://user:pass@host:port/vhost-gpu
# is_gpu: true
# docker_image: codalab/competitions-v2-compute-worker:nvidia
#
# You select the role to run with like s:
# $ fab -R role_name <command>
env.roledefs = yaml.load(open('server_config.yaml').read())
# ----------------------------------------------------------------------------
# Helpers
# ----------------------------------------------------------------------------
def _reconnect_current_host():
network.disconnect_all()
connections.connect(env.host + ':%s' % env.port)
# ----------------------------------------------------------------------------
# Tasks
# ----------------------------------------------------------------------------
def setup():
"""Sets up a machine to have compute worker reqs"""
run("curl -sSL https://get.docker.com/ | sh")
run("sudo usermod -aG docker $USER")
_reconnect_current_host()
def status():
"""Gets status of all docker containers on server"""
run("docker ps -a")
def update():
"""Updates docker image on server and restarts the worker, based on server_config.yaml settings.
See README (Worker management section) for example settings."""
# Ensure that we're referencing one and only 1 role, so we don't accidentally update CPUs to GPUs
if not env.effective_roles:
print("ERROR: You must specify a role when running this task. I.e. fab -R some-gpu task_name`")
return
if len(env.effective_roles) > 1:
print("ERROR: Only specify 1 role (because we need 1 broker_url) when running this task")
return
# Read settings from our server_config.yaml config
role = env.effective_roles[0]
broker_url = env.roledefs[role]["broker_url"]
broker_use_ssl = bool(env.roledefs[role]["broker_use_ssl"])
is_gpu = env.roledefs[role]["is_gpu"]
docker_image = env.roledefs[role]["docker_image"]
if is_gpu:
docker_process = "nvidia-docker"
nvidia_sock = "-v /var/lib/nvidia-docker/nvidia-docker.sock:/var/lib/nvidia-docker/nvidia-docker.sock"
else:
docker_process = "docker"
nvidia_sock = ""
# Build our docker command ensuring the nvidia socket is attached if we're in gpu mode
docker_command = f"""{docker_process} run \
-v /tmp/codabench:/codabench \
-v /var/run/docker.sock:/var/run/docker.sock \
{nvidia_sock} \
-d \
--env BROKER_URL={broker_url} \
--env BROKER_USE_SSL={broker_use_ssl} \
--restart unless-stopped \
--log-opt max-size=50m \
--log-opt max-file=3 \
{docker_image}"""
# Stop and remove containers
run("docker stop $(docker ps -aq)")
run("docker rm $(docker ps -aq)")
# Make sure we have latest image
run(f"docker pull {docker_image}")
run(f"{docker_command}")