diff --git a/docker/cron-docker/periodic/15min/restart_nassl_worker b/docker/cron-docker/periodic/15min/restart_nassl_worker index 1293befa5c..5542b576b3 100755 --- a/docker/cron-docker/periodic/15min/restart_nassl_worker +++ b/docker/cron-docker/periodic/15min/restart_nassl_worker @@ -1,4 +1,8 @@ #!/bin/sh set -e -# find nassl worker and restart the container(s) -docker ps --filter label=com.docker.compose.service=worker-nassl --quiet | xargs --no-run-if-empty docker restart +# stop and start worker one at a time to ensure (batch) tasks are still being picked up +# workers are sent a TERM signal with which a 10 minute grace period before QUIT is sent +for worker in $(docker ps --filter label=com.docker.compose.service=worker-nassl --quiet); do + docker stop "$worker" + docker start "$worker" +done diff --git a/docker/cron-docker/periodic/daily/restart_slow_worker b/docker/cron-docker/periodic/daily/restart_slow_worker new file mode 100755 index 0000000000..a289a66561 --- /dev/null +++ b/docker/cron-docker/periodic/daily/restart_slow_worker @@ -0,0 +1,24 @@ +#!/bin/sh + +# restart slow worker every day to prevent slow memory leaks +# as the slow worker can run very long tasks (eg: report generation) +# we first start a new container before stopping the previous one + +set -e + +cd /opt/Internet.nl + +SERVICE=worker-slow +REPLICAS=$WORKER_SLOW_REPLICAS +COMPOSE_CMD="docker compose --env-file=docker/defaults.env --env-file=docker/host.env --env-file=docker/local.env" + +OLD_CONTAINERS=$($COMPOSE_CMD ps --format "{{ .Name }}"|grep "$SERVICE") + +# bring up new containers, wait until healthy +$COMPOSE_CMD up --no-deps --no-recreate --wait --scale="$SERVICE=$(($REPLICAS*2))" "$SERVICE" + +# graceful shutdown and remove old containers +docker rm --force "$OLD_CONTAINERS" + +# restore replica number to original +$COMPOSE_CMD scale $SERVICE=$REPLICAS diff --git a/docker/cron-docker/periodic/hourly/restart_worker b/docker/cron-docker/periodic/hourly/restart_worker index 580e1b50e2..99540a2af5 100755 --- a/docker/cron-docker/periodic/hourly/restart_worker +++ b/docker/cron-docker/periodic/hourly/restart_worker @@ -1,4 +1,8 @@ #!/bin/sh set -e -# find worker and restart the container(s) -docker ps --filter label=com.docker.compose.service=worker --quiet | xargs --no-run-if-empty docker restart +# stop and start worker one at a time to ensure (batch) tasks are still being picked up +# workers are sent a TERM signal with which a 10 minute grace period before QUIT is sent +for worker in $(docker ps --filter label=com.docker.compose.service=worker --quiet); do + docker stop "$worker" + docker start "$worker" +done diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 69c0ef70b5..2c9b5641cc 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -249,6 +249,8 @@ services: # time after which a SIGKILL is sent to celery after a SIGTERM (warm shutdown), default 10s # insufficient short grace period causes issues on batch when tasks are killed during the hourly worker restart stop_grace_period: 10m + # SIGTERM is default, but make it explicit + stop_signal: SIGTERM depends_on: db-migrate: @@ -735,6 +737,9 @@ services: command: crond -f -d7 -c /etc/crontabs-docker environment: - AUTO_UPDATE_TO + - WORKER_SLOW_REPLICAS + - WORKER_REPLICAS + - RELEASE restart: unless-stopped logging: