Skip to content

Commit

Permalink
Add CPU time monitor using cgroups (#30)
Browse files Browse the repository at this point in the history
* Add ability to get per CPU usage stats.

* Update copyright and minor formatting

* Merge Java8 branch to feature branch (#32)

* Update to java 8

* Remove java7 from travis yaml

* Address code review comments

* Address comments

* Whoops! undo proc.mounts edit

* Fix `getProc` refactor craziness

* Address more comments

* Address a few more comments

* Address more comments

* Name change input-->lines

* Address comments from leventov

* Remove Throwables

* Replace some `_` with camelCase

* Fix bad name refactor

* Soft failures on monitoring
  • Loading branch information
drcrallen authored Jul 24, 2017
1 parent 4dea8dc commit d6ab112
Show file tree
Hide file tree
Showing 20 changed files with 1,664 additions and 38 deletions.
28 changes: 28 additions & 0 deletions src/main/java/com/metamx/metrics/CgroupUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright 2017 Metamarkets Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.metamx.metrics;

import com.metamx.common.StringUtils;
import java.lang.management.ManagementFactory;
import java.util.regex.Pattern;

public class CgroupUtil
{
public static final String SPACE_MATCH = Pattern.quote(" ");
public static final String COMMA_MATCH = Pattern.quote(",");
public static final String COLON_MATCH = Pattern.quote(":");
}
134 changes: 134 additions & 0 deletions src/main/java/com/metamx/metrics/CpuAcctDeltaMonitor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
* Copyright 2017 Metamarkets Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.metamx.metrics;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.metamx.common.logger.Logger;
import com.metamx.emitter.service.ServiceEmitter;
import com.metamx.emitter.service.ServiceMetricEvent;
import com.metamx.metrics.cgroups.CgroupDiscoverer;
import com.metamx.metrics.cgroups.CpuAcct;
import com.metamx.metrics.cgroups.JvmPidDiscoverer;
import com.metamx.metrics.cgroups.PidDiscoverer;
import com.metamx.metrics.cgroups.ProcCgroupDiscoverer;
import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;
import org.joda.time.DateTime;

public class CpuAcctDeltaMonitor extends FeedDefiningMonitor
{
private static final Logger log = new Logger(CpuAcctDeltaMonitor.class);
private final AtomicReference<SnapshotHolder> priorSnapshot = new AtomicReference<>(null);
private final Map<String, String[]> dimensions;

private final PidDiscoverer pidDiscoverer;
private final CgroupDiscoverer cgroupDiscoverer;

public CpuAcctDeltaMonitor()
{
this(ImmutableMap.of());
}

public CpuAcctDeltaMonitor(final Map<String, String[]> dimensions)
{
this(dimensions, DEFAULT_METRICS_FEED);
}

public CpuAcctDeltaMonitor(final Map<String, String[]> dimensions, final String feed)
{
this(feed, dimensions, new JvmPidDiscoverer(), new ProcCgroupDiscoverer());
}

public CpuAcctDeltaMonitor(
String feed,
Map<String, String[]> dimensions,
PidDiscoverer pidDiscoverer,
CgroupDiscoverer cgroupDiscoverer
)
{
super(feed);
Preconditions.checkNotNull(dimensions);
this.dimensions = ImmutableMap.copyOf(dimensions);
this.pidDiscoverer = Preconditions.checkNotNull(pidDiscoverer, "pidDiscoverer required");
this.cgroupDiscoverer = Preconditions.checkNotNull(cgroupDiscoverer, "cgroupDiscoverer required");
}

@Override
public boolean doMonitor(ServiceEmitter emitter)
{
final CpuAcct cpuAcct = new CpuAcct(cgroupDiscoverer, pidDiscoverer);
final CpuAcct.CpuAcctMetric snapshot = cpuAcct.snapshot();
final long nanoTime = System.nanoTime(); // Approx time... may be influenced by an unlucky GC
final DateTime dateTime = new DateTime();
final SnapshotHolder priorSnapshotHolder = this.priorSnapshot.get();
if (!priorSnapshot.compareAndSet(priorSnapshotHolder, new SnapshotHolder(snapshot, nanoTime))) {
log.debug("Pre-empted by another monitor run");
return false;
}
if (priorSnapshotHolder == null) {
log.info("Detected first run, storing result for next run");
return false;
}
final long elapsedNs = nanoTime - priorSnapshotHolder.timestamp;
if (snapshot.cpuCount() != priorSnapshotHolder.metric.cpuCount()) {
log.warn(
"Prior CPU count [%d] does not match current cpu count [%d]. Skipping metrics emission",
priorSnapshotHolder.metric.cpuCount(),
snapshot.cpuCount()
);
return false;
}
for (int i = 0; i < snapshot.cpuCount(); ++i) {
final ServiceMetricEvent.Builder builderUsr = builder()
.setDimension("cpuName", Integer.toString(i))
.setDimension("cpuTime", "usr");
final ServiceMetricEvent.Builder builderSys = builder()
.setDimension("cpuName", Integer.toString(i))
.setDimension("cpuTime", "sys");
MonitorUtils.addDimensionsToBuilder(builderUsr, dimensions);
MonitorUtils.addDimensionsToBuilder(builderSys, dimensions);
emitter.emit(builderUsr.build(
dateTime,
"cgroup/cpu_time_delta_ns",
snapshot.usrTime(i) - priorSnapshotHolder.metric.usrTime(i)
));
emitter.emit(builderSys.build(
dateTime,
"cgroup/cpu_time_delta_ns",
snapshot.sysTime(i) - priorSnapshotHolder.metric.sysTime(i)
));
}
if (snapshot.cpuCount() > 0) {
// Don't bother emitting metrics if there aren't actually any cpus (usually from error)
emitter.emit(builder().build(dateTime, "cgroup/cpu_time_delta_ns_elapsed", elapsedNs));
}
return true;
}

static class SnapshotHolder
{
private final CpuAcct.CpuAcctMetric metric;
private final long timestamp;

SnapshotHolder(CpuAcct.CpuAcctMetric metric, long timestamp)
{
this.metric = metric;
this.timestamp = timestamp;
}
}
}
24 changes: 24 additions & 0 deletions src/main/java/com/metamx/metrics/cgroups/CgroupDiscoverer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright 2017 Metamarkets Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.metamx.metrics.cgroups;

import java.nio.file.Path;

public interface CgroupDiscoverer
{
Path discover(String cgroup, long pid);
}
156 changes: 156 additions & 0 deletions src/main/java/com/metamx/metrics/cgroups/CpuAcct.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/*
* Copyright 2017 Metamarkets Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.metamx.metrics.cgroups;

import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.metamx.common.RE;
import com.metamx.common.logger.Logger;
import com.metamx.metrics.CgroupUtil;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.List;
import java.util.stream.LongStream;

public class CpuAcct
{
private static final Logger LOG = new Logger(CpuAcct.class);
private static final String CGROUP = "cpuacct";
private static final String CGROUP_ACCT_FILE = "cpuacct.usage_all";

// Private because it requires a specific format and cant' take a generic list of strings
private static CpuAcctMetric parse(final List<String> lines)
{
// File has a header. We skip it
// See src/test/resources/cpuacct.usage_all for an example
final int ncpus = lines.size() - 1;
final long[] usrTime = new long[ncpus];
final long[] sysTime = new long[ncpus];
for (int i = 1; i < lines.size(); i++) {
final String[] splits = lines.get(i).split(CgroupUtil.SPACE_MATCH, 3);
if (splits.length != 3) {
throw new RE("Error parsing [%s]", lines.get(i));
}
final int cpuNum = Integer.parseInt(splits[0]);
usrTime[cpuNum] = Long.parseLong(splits[1]);
sysTime[cpuNum] = Long.parseLong(splits[2]);
}
return new CpuAcctMetric(usrTime, sysTime);
}

private final CgroupDiscoverer cgroupDiscoverer;
private final PidDiscoverer pidDiscoverer;

public CpuAcct(CgroupDiscoverer cgroupDiscoverer, PidDiscoverer pidDiscoverer)
{
this.cgroupDiscoverer = cgroupDiscoverer;
this.pidDiscoverer = pidDiscoverer;
}

/**
* Take a snapshot of the existing data.
*
* @return A snapshot with the data populated or a snapshot with zero-length arrays for data.
*/
public CpuAcctMetric snapshot()
{
final File cpuacct;
try {
cpuacct = new File(
cgroupDiscoverer.discover(CGROUP, pidDiscoverer.getPid()).toFile(),
CGROUP_ACCT_FILE
);
}
catch (RuntimeException re) {
LOG.error(re, "Unable to fetch snapshot");
return new CpuAcctMetric(new long[0], new long[0]);
}
try {
return parse(Files.readAllLines(cpuacct.toPath(), Charsets.UTF_8));
}
catch (IOException e) {
throw new RuntimeException(e);
}
}

public static class CpuAcctMetric
{
private final long[] usrTimes;
private final long[] sysTimes;

CpuAcctMetric(long[] usrTimes, long[] sysTimes)
{
Preconditions.checkArgument(usrTimes.length == sysTimes.length, "Lengths must match");
this.usrTimes = usrTimes;
this.sysTimes = sysTimes;
}

public final int cpuCount()
{
return usrTimes.length;
}

public final long[] sysTimes()
{
return sysTimes;
}

public final long[] usrTimes()
{
return usrTimes;
}

public final long usrTime(int cpuNum)
{
return usrTimes[cpuNum];
}

public final long sysTime(int cpu_Num)
{
return sysTimes[cpu_Num];
}

public final long usrTime()
{
return LongStream.of(usrTimes).sum();
}

public final long sysTime()
{
return LongStream.of(sysTimes).sum();
}

public final long time()
{
return usrTime() + sysTime();
}

public final CpuAcctMetric cumulativeSince(CpuAcctMetric other)
{
final int cpuCount = cpuCount();
Preconditions.checkArgument(cpuCount == other.cpuCount(), "Cpu count missmatch");
final long[] sysTimes = new long[cpuCount];
final long[] usrTimes = new long[cpuCount];
for (int i = 0; i < cpuCount; i++) {
sysTimes[i] = this.sysTimes[i] - other.sysTimes[i];
usrTimes[i] = this.usrTimes[i] - other.usrTimes[i];
}
return new CpuAcctMetric(usrTimes, sysTimes);
}
}
}
Loading

0 comments on commit d6ab112

Please sign in to comment.