From 9d00d2e1a3154f82cbda022f0af454b259a458a2 Mon Sep 17 00:00:00 2001 From: Ran Lu Date: Fri, 28 Jun 2024 15:33:29 -0400 Subject: [PATCH] Run oom_detector within host network Since we extended oom_detector to send heartbeat when there is significant network traffic, we need to expose the host network so it can check the network IO from other containers --- cloud/google/workers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/google/workers.py b/cloud/google/workers.py index 10d5c340..ed340bf3 100644 --- a/cloud/google/workers.py +++ b/cloud/google/workers.py @@ -88,7 +88,7 @@ def GenerateWorkers(context, hostname_manager, hostname_nfs_server, worker): docker_image = worker.get('workerImage', context.properties['seuronImage']) - oom_canary_cmd = GenerateDockerCommand(docker_image, docker_env) + ' ' + "python utils/memory_monitor.py ${AIRFLOW__CELERY__BROKER_URL} bot-message-queue >& /dev/null" + oom_canary_cmd = GenerateDockerCommand(docker_image, docker_env + ['--network host']) + ' ' + "python utils/memory_monitor.py ${AIRFLOW__CELERY__BROKER_URL} bot-message-queue >& /dev/null" if worker['type'] == 'gpu': cmd = GenerateCeleryWorkerCommand(docker_image, docker_env+['-p 8793:8793'], queue=worker['type'], concurrency=worker['concurrency'])