From c8d7fb360f166cc9421cf252d1081db167c5ff84 Mon Sep 17 00:00:00 2001 From: Christian Ariza Date: Tue, 2 Jun 2020 15:15:22 +0200 Subject: [PATCH] [k8s] Change the required resources Reducing the required resources, adding a shared redis storate, adding RuntimeError exception to retry for in the query schedd. --- k8s/deployments/spider-flower.yaml | 8 +++++++- k8s/deployments/spider-redis-cp.yaml | 4 ++-- k8s/deployments/spider-redis.yaml | 3 ++- k8s/deployments/spider-worker.yaml | 4 ++-- k8s/storages/shared_redis.yml | 12 ++++++++++++ src/htcondor_es/celery/tasks.py | 2 +- 6 files changed, 26 insertions(+), 7 deletions(-) create mode 100644 k8s/storages/shared_redis.yml diff --git a/k8s/deployments/spider-flower.yaml b/k8s/deployments/spider-flower.yaml index d9b6477..1af02a9 100644 --- a/k8s/deployments/spider-flower.yaml +++ b/k8s/deployments/spider-flower.yaml @@ -25,6 +25,12 @@ spec: name: spider-flower ports: - containerPort: 8888 - resources: {} + resources: + requests: + cpu: 100m + memory: 500Mi + limits: + cpu: 300m + memory: 1Gi restartPolicy: Always status: {} \ No newline at end of file diff --git a/k8s/deployments/spider-redis-cp.yaml b/k8s/deployments/spider-redis-cp.yaml index f28d88f..451440e 100644 --- a/k8s/deployments/spider-redis-cp.yaml +++ b/k8s/deployments/spider-redis-cp.yaml @@ -30,11 +30,11 @@ spec: - containerPort: 6379 resources: requests: - cpu: 500m + cpu: 100m memory: 500Mi limits: cpu: 1000m - memory: 4Gi + memory: 1Gi volumeMounts: - mountPath: /data name: redis-checkpoint-claim0 diff --git a/k8s/deployments/spider-redis.yaml b/k8s/deployments/spider-redis.yaml index c910360..165406c 100644 --- a/k8s/deployments/spider-redis.yaml +++ b/k8s/deployments/spider-redis.yaml @@ -37,5 +37,6 @@ spec: restartPolicy: Always volumes: - name: redis-claim0 - emptyDir: {} + persistentVolumeClaim: + claimName: redis-cephfs-claim status: {} \ No newline at end of file diff --git a/k8s/deployments/spider-worker.yaml b/k8s/deployments/spider-worker.yaml index daffd60..daf9562 100644 --- a/k8s/deployments/spider-worker.yaml +++ b/k8s/deployments/spider-worker.yaml @@ -28,10 +28,10 @@ spec: resources: requests: cpu: 100m - memory: 200Mi + memory: 500Mi limits: cpu: 2000m - memory: 1Gi + memory: 2Gi env: &spider_env - name: AFFILIATION_DIR_LOCATION value: /cms_shared/affiliation_dir.json diff --git a/k8s/storages/shared_redis.yml b/k8s/storages/shared_redis.yml new file mode 100644 index 0000000..7319692 --- /dev/null +++ b/k8s/storages/shared_redis.yml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: redis-cephfs-claim + namespace: spider +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 1G + storageClassName: meyrin-cephfs \ No newline at end of file diff --git a/src/htcondor_es/celery/tasks.py b/src/htcondor_es/celery/tasks.py index 7b920bf..1482a56 100644 --- a/src/htcondor_es/celery/tasks.py +++ b/src/htcondor_es/celery/tasks.py @@ -51,7 +51,7 @@ # ---Tasks---- -@app.task(max_retries=3, serializer="pickle") +@app.task(max_retries=3, autoretry_for=(RuntimeError,) serializer="pickle") def query_schedd( schedd_ad, start_time=None,