-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add CPU time monitor using cgroups (#30)
* Add ability to get per CPU usage stats. * Update copyright and minor formatting * Merge Java8 branch to feature branch (#32) * Update to java 8 * Remove java7 from travis yaml * Address code review comments * Address comments * Whoops! undo proc.mounts edit * Fix `getProc` refactor craziness * Address more comments * Address a few more comments * Address more comments * Name change input-->lines * Address comments from leventov * Remove Throwables * Replace some `_` with camelCase * Fix bad name refactor * Soft failures on monitoring
- Loading branch information
Showing
20 changed files
with
1,664 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
/* | ||
* Copyright 2017 Metamarkets Group Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.metamx.metrics; | ||
|
||
import com.metamx.common.StringUtils; | ||
import java.lang.management.ManagementFactory; | ||
import java.util.regex.Pattern; | ||
|
||
public class CgroupUtil | ||
{ | ||
public static final String SPACE_MATCH = Pattern.quote(" "); | ||
public static final String COMMA_MATCH = Pattern.quote(","); | ||
public static final String COLON_MATCH = Pattern.quote(":"); | ||
} |
134 changes: 134 additions & 0 deletions
134
src/main/java/com/metamx/metrics/CpuAcctDeltaMonitor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
/* | ||
* Copyright 2017 Metamarkets Group Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.metamx.metrics; | ||
|
||
import com.google.common.base.Preconditions; | ||
import com.google.common.collect.ImmutableMap; | ||
import com.metamx.common.logger.Logger; | ||
import com.metamx.emitter.service.ServiceEmitter; | ||
import com.metamx.emitter.service.ServiceMetricEvent; | ||
import com.metamx.metrics.cgroups.CgroupDiscoverer; | ||
import com.metamx.metrics.cgroups.CpuAcct; | ||
import com.metamx.metrics.cgroups.JvmPidDiscoverer; | ||
import com.metamx.metrics.cgroups.PidDiscoverer; | ||
import com.metamx.metrics.cgroups.ProcCgroupDiscoverer; | ||
import java.util.Map; | ||
import java.util.concurrent.atomic.AtomicReference; | ||
import org.joda.time.DateTime; | ||
|
||
public class CpuAcctDeltaMonitor extends FeedDefiningMonitor | ||
{ | ||
private static final Logger log = new Logger(CpuAcctDeltaMonitor.class); | ||
private final AtomicReference<SnapshotHolder> priorSnapshot = new AtomicReference<>(null); | ||
private final Map<String, String[]> dimensions; | ||
|
||
private final PidDiscoverer pidDiscoverer; | ||
private final CgroupDiscoverer cgroupDiscoverer; | ||
|
||
public CpuAcctDeltaMonitor() | ||
{ | ||
this(ImmutableMap.of()); | ||
} | ||
|
||
public CpuAcctDeltaMonitor(final Map<String, String[]> dimensions) | ||
{ | ||
this(dimensions, DEFAULT_METRICS_FEED); | ||
} | ||
|
||
public CpuAcctDeltaMonitor(final Map<String, String[]> dimensions, final String feed) | ||
{ | ||
this(feed, dimensions, new JvmPidDiscoverer(), new ProcCgroupDiscoverer()); | ||
} | ||
|
||
public CpuAcctDeltaMonitor( | ||
String feed, | ||
Map<String, String[]> dimensions, | ||
PidDiscoverer pidDiscoverer, | ||
CgroupDiscoverer cgroupDiscoverer | ||
) | ||
{ | ||
super(feed); | ||
Preconditions.checkNotNull(dimensions); | ||
this.dimensions = ImmutableMap.copyOf(dimensions); | ||
this.pidDiscoverer = Preconditions.checkNotNull(pidDiscoverer, "pidDiscoverer required"); | ||
this.cgroupDiscoverer = Preconditions.checkNotNull(cgroupDiscoverer, "cgroupDiscoverer required"); | ||
} | ||
|
||
@Override | ||
public boolean doMonitor(ServiceEmitter emitter) | ||
{ | ||
final CpuAcct cpuAcct = new CpuAcct(cgroupDiscoverer, pidDiscoverer); | ||
final CpuAcct.CpuAcctMetric snapshot = cpuAcct.snapshot(); | ||
final long nanoTime = System.nanoTime(); // Approx time... may be influenced by an unlucky GC | ||
final DateTime dateTime = new DateTime(); | ||
final SnapshotHolder priorSnapshotHolder = this.priorSnapshot.get(); | ||
if (!priorSnapshot.compareAndSet(priorSnapshotHolder, new SnapshotHolder(snapshot, nanoTime))) { | ||
log.debug("Pre-empted by another monitor run"); | ||
return false; | ||
} | ||
if (priorSnapshotHolder == null) { | ||
log.info("Detected first run, storing result for next run"); | ||
return false; | ||
} | ||
final long elapsedNs = nanoTime - priorSnapshotHolder.timestamp; | ||
if (snapshot.cpuCount() != priorSnapshotHolder.metric.cpuCount()) { | ||
log.warn( | ||
"Prior CPU count [%d] does not match current cpu count [%d]. Skipping metrics emission", | ||
priorSnapshotHolder.metric.cpuCount(), | ||
snapshot.cpuCount() | ||
); | ||
return false; | ||
} | ||
for (int i = 0; i < snapshot.cpuCount(); ++i) { | ||
final ServiceMetricEvent.Builder builderUsr = builder() | ||
.setDimension("cpuName", Integer.toString(i)) | ||
.setDimension("cpuTime", "usr"); | ||
final ServiceMetricEvent.Builder builderSys = builder() | ||
.setDimension("cpuName", Integer.toString(i)) | ||
.setDimension("cpuTime", "sys"); | ||
MonitorUtils.addDimensionsToBuilder(builderUsr, dimensions); | ||
MonitorUtils.addDimensionsToBuilder(builderSys, dimensions); | ||
emitter.emit(builderUsr.build( | ||
dateTime, | ||
"cgroup/cpu_time_delta_ns", | ||
snapshot.usrTime(i) - priorSnapshotHolder.metric.usrTime(i) | ||
)); | ||
emitter.emit(builderSys.build( | ||
dateTime, | ||
"cgroup/cpu_time_delta_ns", | ||
snapshot.sysTime(i) - priorSnapshotHolder.metric.sysTime(i) | ||
)); | ||
} | ||
if (snapshot.cpuCount() > 0) { | ||
// Don't bother emitting metrics if there aren't actually any cpus (usually from error) | ||
emitter.emit(builder().build(dateTime, "cgroup/cpu_time_delta_ns_elapsed", elapsedNs)); | ||
} | ||
return true; | ||
} | ||
|
||
static class SnapshotHolder | ||
{ | ||
private final CpuAcct.CpuAcctMetric metric; | ||
private final long timestamp; | ||
|
||
SnapshotHolder(CpuAcct.CpuAcctMetric metric, long timestamp) | ||
{ | ||
this.metric = metric; | ||
this.timestamp = timestamp; | ||
} | ||
} | ||
} |
24 changes: 24 additions & 0 deletions
24
src/main/java/com/metamx/metrics/cgroups/CgroupDiscoverer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
/* | ||
* Copyright 2017 Metamarkets Group Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.metamx.metrics.cgroups; | ||
|
||
import java.nio.file.Path; | ||
|
||
public interface CgroupDiscoverer | ||
{ | ||
Path discover(String cgroup, long pid); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
/* | ||
* Copyright 2017 Metamarkets Group Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.metamx.metrics.cgroups; | ||
|
||
import com.google.common.base.Charsets; | ||
import com.google.common.base.Preconditions; | ||
import com.metamx.common.RE; | ||
import com.metamx.common.logger.Logger; | ||
import com.metamx.metrics.CgroupUtil; | ||
import java.io.File; | ||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
import java.util.List; | ||
import java.util.stream.LongStream; | ||
|
||
public class CpuAcct | ||
{ | ||
private static final Logger LOG = new Logger(CpuAcct.class); | ||
private static final String CGROUP = "cpuacct"; | ||
private static final String CGROUP_ACCT_FILE = "cpuacct.usage_all"; | ||
|
||
// Private because it requires a specific format and cant' take a generic list of strings | ||
private static CpuAcctMetric parse(final List<String> lines) | ||
{ | ||
// File has a header. We skip it | ||
// See src/test/resources/cpuacct.usage_all for an example | ||
final int ncpus = lines.size() - 1; | ||
final long[] usrTime = new long[ncpus]; | ||
final long[] sysTime = new long[ncpus]; | ||
for (int i = 1; i < lines.size(); i++) { | ||
final String[] splits = lines.get(i).split(CgroupUtil.SPACE_MATCH, 3); | ||
if (splits.length != 3) { | ||
throw new RE("Error parsing [%s]", lines.get(i)); | ||
} | ||
final int cpuNum = Integer.parseInt(splits[0]); | ||
usrTime[cpuNum] = Long.parseLong(splits[1]); | ||
sysTime[cpuNum] = Long.parseLong(splits[2]); | ||
} | ||
return new CpuAcctMetric(usrTime, sysTime); | ||
} | ||
|
||
private final CgroupDiscoverer cgroupDiscoverer; | ||
private final PidDiscoverer pidDiscoverer; | ||
|
||
public CpuAcct(CgroupDiscoverer cgroupDiscoverer, PidDiscoverer pidDiscoverer) | ||
{ | ||
this.cgroupDiscoverer = cgroupDiscoverer; | ||
this.pidDiscoverer = pidDiscoverer; | ||
} | ||
|
||
/** | ||
* Take a snapshot of the existing data. | ||
* | ||
* @return A snapshot with the data populated or a snapshot with zero-length arrays for data. | ||
*/ | ||
public CpuAcctMetric snapshot() | ||
{ | ||
final File cpuacct; | ||
try { | ||
cpuacct = new File( | ||
cgroupDiscoverer.discover(CGROUP, pidDiscoverer.getPid()).toFile(), | ||
CGROUP_ACCT_FILE | ||
); | ||
} | ||
catch (RuntimeException re) { | ||
LOG.error(re, "Unable to fetch snapshot"); | ||
return new CpuAcctMetric(new long[0], new long[0]); | ||
} | ||
try { | ||
return parse(Files.readAllLines(cpuacct.toPath(), Charsets.UTF_8)); | ||
} | ||
catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
public static class CpuAcctMetric | ||
{ | ||
private final long[] usrTimes; | ||
private final long[] sysTimes; | ||
|
||
CpuAcctMetric(long[] usrTimes, long[] sysTimes) | ||
{ | ||
Preconditions.checkArgument(usrTimes.length == sysTimes.length, "Lengths must match"); | ||
this.usrTimes = usrTimes; | ||
this.sysTimes = sysTimes; | ||
} | ||
|
||
public final int cpuCount() | ||
{ | ||
return usrTimes.length; | ||
} | ||
|
||
public final long[] sysTimes() | ||
{ | ||
return sysTimes; | ||
} | ||
|
||
public final long[] usrTimes() | ||
{ | ||
return usrTimes; | ||
} | ||
|
||
public final long usrTime(int cpuNum) | ||
{ | ||
return usrTimes[cpuNum]; | ||
} | ||
|
||
public final long sysTime(int cpu_Num) | ||
{ | ||
return sysTimes[cpu_Num]; | ||
} | ||
|
||
public final long usrTime() | ||
{ | ||
return LongStream.of(usrTimes).sum(); | ||
} | ||
|
||
public final long sysTime() | ||
{ | ||
return LongStream.of(sysTimes).sum(); | ||
} | ||
|
||
public final long time() | ||
{ | ||
return usrTime() + sysTime(); | ||
} | ||
|
||
public final CpuAcctMetric cumulativeSince(CpuAcctMetric other) | ||
{ | ||
final int cpuCount = cpuCount(); | ||
Preconditions.checkArgument(cpuCount == other.cpuCount(), "Cpu count missmatch"); | ||
final long[] sysTimes = new long[cpuCount]; | ||
final long[] usrTimes = new long[cpuCount]; | ||
for (int i = 0; i < cpuCount; i++) { | ||
sysTimes[i] = this.sysTimes[i] - other.sysTimes[i]; | ||
usrTimes[i] = this.usrTimes[i] - other.usrTimes[i]; | ||
} | ||
return new CpuAcctMetric(usrTimes, sysTimes); | ||
} | ||
} | ||
} |
Oops, something went wrong.