diff --git a/deployments/kubernetes/monitoring/grafana/deployment.yml b/deployments/kubernetes/monitoring/grafana/deployment.yml new file mode 100644 index 00000000..b8ec5863 --- /dev/null +++ b/deployments/kubernetes/monitoring/grafana/deployment.yml @@ -0,0 +1,74 @@ +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana + namespace: monitoring + labels: + app: grafana +spec: + replicas: 1 + strategy: + type: RollingUpdate + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + spec: + nodeSelector: + env: master + volumes: + - name: grafana-volume + # persistentVolumeClaim: + # claimName: grafana-volume + containers: + - name: grafana + image: "{{ grafana_container_image }}" + imagePullPolicy: "{{ image_pull_policy }}" + ports: + - name: http + containerPort: 3000 + volumeMounts: + - name: grafana-volume + mountPath: /var/lib/grafana + env: + - name: GF_PATHS_LOGS + value: /var/log/grafana/ + - name: GF_LOG_MODE + value: "console file" + - name: GF_SERVER_DOMAIN + value: "{{ service_hostname }}" + - name: GF_SERVER_ROOT_URL + value: "https://{{ service_hostname }}/grafana" + - name: GF_SERVER_SERVE_FROM_SUB_PATH + value: "true" + - name: GF_SERVER_HTTP_PORT + value: "3000" + - name: GF_AUTH_GITHUB_ENABLED + value: "true" + - name: GF_AUTH_GITHUB_ALLOW_SIGN_UP + value: "true" + - name: GF_AUTH_GITHUB_ALLOWED_ORGANIZATIONS + value: "cmu-db" + - name: GF_SECURITY_ADMIN_USER + value: "admin" + - name: GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: secrets-k8s-master + key: gf_admin_password + #GitHub Login + - name: GF_AUTH_GITHUB_CLIENT_ID + valueFrom: + secretKeyRef: + name: secrets-k8s-master + key: gf_auth_github_client_id + - name: GF_AUTH_GITHUB_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: secrets-k8s-master + key: gf_auth_github_client_secret \ No newline at end of file diff --git a/deployments/kubernetes/monitoring/grafana/service.yml b/deployments/kubernetes/monitoring/grafana/service.yml new file mode 100644 index 00000000..2b3412a0 --- /dev/null +++ b/deployments/kubernetes/monitoring/grafana/service.yml @@ -0,0 +1,18 @@ +--- + +kind: Service +apiVersion: v1 +metadata: + name: grafana-service + namespace: monitoring + labels: + app: grafana +spec: + type: NodePort + selector: + app: grafana + ports: + - protocol: TCP + port: 3000 + nodePort: 32004 + externalTrafficPolicy: Local \ No newline at end of file diff --git a/deployments/kubernetes/monitoring/prometheus/cluster-role-binding.yml b/deployments/kubernetes/monitoring/prometheus/cluster-role-binding.yml new file mode 100644 index 00000000..7b41cf30 --- /dev/null +++ b/deployments/kubernetes/monitoring/prometheus/cluster-role-binding.yml @@ -0,0 +1,14 @@ +--- + +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: + - kind: ServiceAccount + name: default + namespace: monitoring \ No newline at end of file diff --git a/deployments/kubernetes/monitoring/prometheus/cluster-role.yml b/deployments/kubernetes/monitoring/prometheus/cluster-role.yml new file mode 100644 index 00000000..365bb255 --- /dev/null +++ b/deployments/kubernetes/monitoring/prometheus/cluster-role.yml @@ -0,0 +1,29 @@ +--- + +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRole +metadata: + name: prometheus +rules: + - apiGroups: [""] + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: + - get + - list + - watch + - apiGroups: ["extensions"] + resources: + - ingresses + verbs: + - get + - list + - watch + - nonResourceURLs: + - /metrics + verbs: + - get \ No newline at end of file diff --git a/deployments/kubernetes/monitoring/prometheus/config-map.yml b/deployments/kubernetes/monitoring/prometheus/config-map.yml new file mode 100644 index 00000000..63c010ff --- /dev/null +++ b/deployments/kubernetes/monitoring/prometheus/config-map.yml @@ -0,0 +1,115 @@ +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-server-conf + labels: + name: prometheus-server-conf + namespace: monitoring +data: + prometheus.yml: |- + global: + scrape_interval: 5s + evaluation_interval: 5s + + scrape_configs: + - job_name: 'kubernetes-apiservers' + kubernetes_sd_configs: + - role: endpoints + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] + action: keep + regex: default;kubernetes;https + + - job_name: 'kubernetes-nodes' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics + + - job_name: 'kubernetes-pods' + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: kubernetes_pod_name + + - job_name: 'kubernetes-cadvisor' + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor + + - job_name: 'kubernetes-service-endpoints' + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] + action: replace + target_label: __scheme__ + regex: (https?) + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] + action: replace + target_label: __address__ + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + - action: labelmap + regex: __meta_kubernetes_service_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_service_name] + action: replace + target_label: kubernetes_name \ No newline at end of file diff --git a/deployments/kubernetes/monitoring/prometheus/deployment.yml b/deployments/kubernetes/monitoring/prometheus/deployment.yml new file mode 100644 index 00000000..b80425a0 --- /dev/null +++ b/deployments/kubernetes/monitoring/prometheus/deployment.yml @@ -0,0 +1,46 @@ +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus-deployment + namespace: monitoring + labels: + app: prometheus-server +spec: + replicas: 1 + strategy: + type: RollingUpdate + selector: + matchLabels: + app: prometheus-server + template: + metadata: + labels: + app: prometheus-server + spec: + nodeSelector: + env: master + containers: + - name: prometheus + image: prom/prometheus:v2.2.1 + args: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus/" + # FOR REVERSED PROXY + - "--web.external-url=http://localhost:9090/prometheus" + - "--web.route-prefix=/" + ports: + - containerPort: 9090 + volumeMounts: + - name: prometheus-config-volume + mountPath: /etc/prometheus/ + - name: prometheus-storage-volume + mountPath: /prometheus/ + volumes: + - name: prometheus-config-volume + configMap: + defaultMode: 420 + name: prometheus-server-conf + - name: prometheus-storage-volume + emptyDir: {} \ No newline at end of file diff --git a/deployments/kubernetes/monitoring/prometheus/service.yml b/deployments/kubernetes/monitoring/prometheus/service.yml new file mode 100644 index 00000000..a268d69e --- /dev/null +++ b/deployments/kubernetes/monitoring/prometheus/service.yml @@ -0,0 +1,18 @@ +--- + +apiVersion: v1 +kind: Service +metadata: + name: prometheus-service + namespace: monitoring + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" +spec: + selector: + app: prometheus-server + type: NodePort + ports: + - port: 8080 + targetPort: 9090 + nodePort: 30000 \ No newline at end of file diff --git a/deployments/kubernetes/namespaces.yml b/deployments/kubernetes/namespaces.yml index 509a428d..7e7b09fd 100644 --- a/deployments/kubernetes/namespaces.yml +++ b/deployments/kubernetes/namespaces.yml @@ -8,4 +8,4 @@ metadata: apiVersion: v1 kind: Namespace metadata: - name: fuzzing \ No newline at end of file + name: monitoring \ No newline at end of file diff --git a/deployments/playbooks/monitoring-grafana-deployment.yml b/deployments/playbooks/monitoring-grafana-deployment.yml new file mode 100644 index 00000000..a17f6635 --- /dev/null +++ b/deployments/playbooks/monitoring-grafana-deployment.yml @@ -0,0 +1,28 @@ +--- +- hosts: k8s_master + name: Deploy Moniotoring Grafana + vars: + dir_repo: "{{ inventory_dir | dirname }}" + dir_deploy: "{{ inventory_dir }}" + dir_k8s_grafana: "{{ dir_deploy }}/kubernetes/monitoring/grafana" + service_hostname: incrudibles-k8s.db.pdl.cmu.edu + ansible_python_interpreter: /usr/bin/python3 + pre_tasks: + - name: Ensure k8s module dependencies are installed. + pip: + name: openshift + state: present + tasks: + - name: Create Grafana Deployment + vars: + deployment_file: "{{ dir_k8s_grafana }}/deployment.yml" + k8s: + state: present + definition: "{{ lookup('template', '{{ deployment_file }}') }}" + + - name: Create Grafana Service + vars: + service_file: "{{ dir_k8s_grafana }}/service.yml" + k8s: + state: present + definition: "{{ lookup('template', '{{ service_file }}') }}" \ No newline at end of file diff --git a/deployments/playbooks/prometheus-deployment.yml b/deployments/playbooks/prometheus-deployment.yml new file mode 100644 index 00000000..89ec321d --- /dev/null +++ b/deployments/playbooks/prometheus-deployment.yml @@ -0,0 +1,26 @@ +--- + +- hosts: "{{ host_override | default('k8s_master') }}" + name: Deploy Prometheus + vars: + dir_deployment: "{{ inventory_dir }}" + dir_k8s_prometheus: "{{ dir_deployment }}/kubernetes/monitoring/prometheus" + pre_tasks: + - name: Ensure k8s module dependencies are installed. + pip: + state: present + name: openshift + + tasks: + - name: Apply Prometheus Deployment Configs + vars: + config: "{{ dir_k8s_prometheus }}/{{ item }}" + k8s: + state: present + definition: "{{ lookup('template', '{{ config }}') }}" + loop: + - cluster-role.yml + - cluster-role-binding.yml + - config-map.yml + - deployment.yml + - service.yml \ No newline at end of file diff --git a/grafana/dashboards/MonitoringTesting.json b/grafana/dashboards/MonitoringTesting.json new file mode 100644 index 00000000..1a3fb26a --- /dev/null +++ b/grafana/dashboards/MonitoringTesting.json @@ -0,0 +1,235 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 13, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 6, + "panels": [], + "title": "Uptime", + "type": "row" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + } + }, + "pluginVersion": "7.0.3", + "targets": [ + { + "expr": "time() - container_start_time_seconds{pod=~\"testing-performance-storage-service.*\",container=\"performance-storage-service\"}", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Performance Storage Service", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + } + }, + "pluginVersion": "7.0.3", + "targets": [ + { + "expr": "time() - container_start_time_seconds{pod=~\"testing-grafana.*\",container=\"grafana\"}", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Grafana", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "custom": {}, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 13, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + } + }, + "pluginVersion": "7.0.3", + "targets": [ + { + "expr": "time() - container_start_time_seconds{pod=~\"testing-timescaledb.*\",container=\"timescaledb\"}", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "TimeScaleDB", + "type": "stat" + } + ], + "refresh": "5s", + "schemaVersion": 25, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Testing Metrics", + "uid": "FFVaNrVGz", + "version": 12 +} \ No newline at end of file