Skip to content
This repository has been archived by the owner on Apr 22, 2020. It is now read-only.

Support metrics sampling #107

Merged
merged 2 commits into from
Dec 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
package de.zalando.zmon.dataservice.config;

import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.boot.context.properties.NestedConfigurationProperty;
import org.springframework.context.annotation.Configuration;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.boot.context.properties.NestedConfigurationProperty;
import org.springframework.context.annotation.Configuration;

@Configuration
@ConfigurationProperties(prefix = "dataservice")
public class DataServiceConfigProperties {
Expand Down Expand Up @@ -90,7 +90,7 @@ public void setwhiteListedChecks(List<Integer> whiteListedChecks) {
@NestedConfigurationProperty
private Map<String, AsyncExecutorProperties> asyncExecutors = new HashMap<>();

private int resultSizeWarning = 100;
private int resultSizeWarning = 2000;
private long connectionsTimeToLive = 2 * 60 * 1000;

private Map<String, Object> versionConfig = null;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
package de.zalando.zmon.dataservice.data;

import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import de.zalando.zmon.dataservice.config.DataServiceConfigProperties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Component;

import de.zalando.zmon.dataservice.config.DataServiceConfigProperties;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

@Component
class ApplicationMetricsWriter implements WorkResultWriter {
Expand All @@ -36,7 +35,7 @@ public void write(WriteData writeData) {
if (writeData.getWorkerResultOptional().isPresent() && config.getRestMetricHosts() != null && config.getRestMetricHosts().size() > 0) {
try {
Map<Integer, List<CheckData>> partitions = writeData.getWorkerResultOptional().get().results.stream()
.filter(x -> config.getActuatorMetricChecks().contains(x.check_id)).filter(x -> !x.exception)
.filter(x -> config.getActuatorMetricChecks().contains(x.checkId)).filter(x -> !x.exception)
.collect(Collectors.groupingBy(x -> Math
.abs(x.entity.get("application_id").hashCode() % config.getRestMetricHosts().size())));

Expand Down
19 changes: 13 additions & 6 deletions src/main/java/de/zalando/zmon/dataservice/data/CheckData.java
Original file line number Diff line number Diff line change
@@ -1,21 +1,28 @@
package de.zalando.zmon.dataservice.data;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.JsonNode;

import java.util.HashMap;
import java.util.Map;

import com.fasterxml.jackson.databind.JsonNode;

/**
* Created by jmussler on 4/22/15.
*/
public class CheckData {
public String time;
public String worker;
public int check_id;
public String entity_id;
@JsonProperty("check_id")
public int checkId;
@JsonProperty("entity_id")
public String entityId;
public Map<String,String> entity = new HashMap<>();
public double run_time;
public JsonNode check_result;
@JsonProperty("run_time")
public double runTime;
@JsonProperty("check_result")
public JsonNode checkResult;
public boolean exception;
public Map<String, AlertData> alerts = new HashMap<>(0);
@JsonProperty("is_sampled")
public boolean isSampled = true;
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package de.zalando.zmon.dataservice.data;

import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import de.zalando.zmon.dataservice.ApplianceVersionService;
import de.zalando.zmon.dataservice.DataServiceMetrics;
import de.zalando.zmon.dataservice.components.CustomObjectMapper;
import de.zalando.zmon.dataservice.components.DefaultObjectMapper;
import de.zalando.zmon.dataservice.config.DataServiceConfigProperties;
import de.zalando.zmon.dataservice.oauth2.BearerToken;
import org.slf4j.Logger;
Expand All @@ -14,20 +15,11 @@
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestHeader;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.bind.annotation.*;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;

import de.zalando.zmon.dataservice.DataServiceMetrics;
import de.zalando.zmon.dataservice.components.CustomObjectMapper;
import de.zalando.zmon.dataservice.components.DefaultObjectMapper;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

@RestController
@RequestMapping("/api")
Expand Down Expand Up @@ -122,7 +114,7 @@ protected Optional<WorkerResult> extractAndFilter(String data, String accountId,
// that
// execute the wrong checks
wrOptional.get().results = wrOptional.get().results.stream()
.filter(x -> x.entity_id.contains(accountId)).collect(Collectors.toList());
.filter(x -> x.entityId.contains(accountId)).collect(Collectors.toList());
}
}
return wrOptional;
Expand Down
25 changes: 9 additions & 16 deletions src/main/java/de/zalando/zmon/dataservice/data/KairosDBStore.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ public void fillFlatValueMap(Map<String, NumericNode> values, String prefix, Jso

private final DataServiceMetrics metrics;
private final int resultSizeWarning;
private WhitelistedChecks whitelist;

private static class DataPoint {
public String name;
Expand All @@ -76,13 +75,11 @@ private static class DataPoint {
}

@Autowired
public KairosDBStore(DataServiceConfigProperties config, DataServiceMetrics metrics, DataPointsQueryStore dataPointsQueryStore,
WhitelistedChecks whitelist) {
public KairosDBStore(DataServiceConfigProperties config, DataServiceMetrics metrics, DataPointsQueryStore dataPointsQueryStore) {
this.metrics = metrics;
this.config = config;
this.dataPointsQueryStore = dataPointsQueryStore;
this.resultSizeWarning = config.getResultSizeWarning();
this.whitelist = whitelist;

if (null == config.getKairosdbTagFields() || config.getKairosdbTagFields().size() == 0) {
this.entityTagFields = DEFAULT_ENTITY_TAG_FIELDS;
Expand Down Expand Up @@ -140,33 +137,29 @@ void store(WorkerResult wr) {
List<DataPoint> points = new LinkedList<>();
for (CheckData cd : wr.results) {

//Get whitelist from dynamic entity reader
List<Integer> whiteListedChecks = whitelist.getWhitelist();
//Only ingest whitelisted checks
//if (! config.getwhiteListedChecks().contains(cd.check_id)){
if (!whiteListedChecks.contains(cd.check_id)) {
LOG.warn("Dropping non critical checkid={} ", cd.check_id);
if (!cd.isSampled) {
LOG.debug("Dropping non-sampled metrics for checkid={}", cd.checkId);
continue;
}

final Map<String, NumericNode> values = new HashMap<>();
final String timeSeries = "zmon.check." + cd.check_id;
final String timeSeries = "zmon.check." + cd.checkId;

Double ts = cd.check_result.get("ts").asDouble();
Double ts = cd.checkResult.get("ts").asDouble();
ts = ts * 1000.;
Long tsL = ts.longValue();

fillFlatValueMap(values, "", cd.check_result.get("value"));
fillFlatValueMap(values, "", cd.checkResult.get("value"));

for (Map.Entry<String, NumericNode> e : values.entrySet()) {
DataPoint p = new DataPoint();
p.name = timeSeries;

p.tags.putAll(getTags(e.getKey(), cd.entity_id, cd.entity));
p.tags.putAll(getTags(e.getKey(), cd.entityId, cd.entity));

// handle zmon actuator metrics and extract the http status code into its own field
// put the first character of the status code into "status group" sg, this is only for easy kairosdb query
if (config.getActuatorMetricChecks().contains(cd.check_id)) {
if (config.getActuatorMetricChecks().contains(cd.checkId)) {
final String[] keyParts = e.getKey().split("\\.");

if (keyParts.length >= 3 && "health".equals(keyParts[0]) && "200".equals(keyParts[2])) {
Expand Down Expand Up @@ -202,7 +195,7 @@ void store(WorkerResult wr) {
}

if (points.size() > resultSizeWarning) {
LOG.warn("result size warning: check={} data-points={} entity={}", cd.check_id, points.size(), cd.entity_id);
LOG.warn("result size warning: check={} data-points={} entity={}", cd.checkId, points.size(), cd.entityId);
}
}

Expand Down
28 changes: 14 additions & 14 deletions src/main/java/de/zalando/zmon/dataservice/data/RedisDataStore.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,34 +80,34 @@ public void store(WorkerResult wr) {
Pipeline p = jedis.pipelined();

for (CheckData cd : wr.results) {
p.sadd("zmon:checks", "" + cd.check_id);
p.sadd("zmon:checks:" + cd.check_id, cd.entity_id);
String checkTs = "zmon:checks:" + cd.check_id + ":" + cd.entity_id;
p.sadd("zmon:checks", "" + cd.checkId);
p.sadd("zmon:checks:" + cd.checkId, cd.entityId);
String checkTs = "zmon:checks:" + cd.checkId + ":" + cd.entityId;

String checkValue = writeValueAsString(cd.check_result).orElse(EMPTY_CHECK);
String checkValue = writeValueAsString(cd.checkResult).orElse(EMPTY_CHECK);
p.lpush(checkTs, checkValue);
p.ltrim(checkTs, 0, 2);

if (null != cd.alerts) {
for (AlertData alert : cd.alerts.values()) {

createEvents(cd.entity_id, cd.check_id, checkValue, alert);
createEvents(cd.entityId, cd.checkId, checkValue, alert);

if (alert.active && alert.in_period) {
p.sadd("zmon:alerts:" + alert.alert_id, cd.entity_id);
p.sadd("zmon:alerts:" + alert.alert_id, cd.entityId);

String value = buildValue(alert, cd);

p.set("zmon:alerts:" + alert.alert_id + ":" + cd.entity_id, value);
p.set("zmon:alerts:" + alert.alert_id + ":" + cd.entityId, value);

} else {
p.srem("zmon:alerts:" + alert.alert_id, cd.entity_id);
p.del("zmon:alerts:" + alert.alert_id + ":" + cd.entity_id);
p.srem("zmon:alerts:" + alert.alert_id, cd.entityId);
p.del("zmon:alerts:" + alert.alert_id + ":" + cd.entityId);
}

String captures = writeValueAsString(alert.captures).orElse(CAPTURES_NOT_SERIALIZED);

p.hset("zmon:alerts:" + alert.alert_id + ":entities", cd.entity_id, captures);
p.hset("zmon:alerts:" + alert.alert_id + ":entities", cd.entityId, captures);

p.eval("if redis.call('scard','zmon:alerts:" + alert.alert_id + "') == 0 then " +
"redis.call('srem','zmon:alert-acks', " + alert.alert_id + "); " +
Expand Down Expand Up @@ -151,15 +151,15 @@ protected String buildValue(AlertData alert, CheckData cd) {

vNode.put("start_time", alertStart);

vNode.set("ts", cd.check_result.get("ts"));
vNode.set("td", cd.check_result.get("td"));
vNode.set("worker", cd.check_result.get("worker"));
vNode.set("ts", cd.checkResult.get("ts"));
vNode.set("td", cd.checkResult.get("td"));
vNode.set("worker", cd.checkResult.get("worker"));

if (cd.exception) {
vNode.put("exc", 1);
}

vNode.putPOJO("value", cd.check_result.get("value"));
vNode.putPOJO("value", cd.checkResult.get("value"));

try {
value = mapper.writeValueAsString(vNode);
Expand Down

This file was deleted.

Loading