Skip to content

Commit

Permalink
Code to periodically track IProfiler overhead
Browse files Browse the repository at this point in the history
Every 5 seconds we will print the number of profiling entries,
the CPU taken by the IProfiler thread and the top 5 methods
that received profiling samples.
The code is not active, to minimize overhead. In order to
compile it in one needs to define `TRACK_SAMPLE_SOURCES`
Also, to activate it at runtime, one needs to define the
following env var: `TR_TrackIProfilerActivity`
The information appears in the verbose log if one is defined
or to stderr if there is no vlog.

Signed-off-by: Marius Pirvu <[email protected]>
  • Loading branch information
mpirvu committed Jan 11, 2025
1 parent 3d7fea2 commit 15b5abe
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 9 deletions.
125 changes: 118 additions & 7 deletions runtime/compiler/runtime/IProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,12 @@ TR_IProfiler::TR_IProfiler(J9JITConfig *jitConfig)
{
_isIProfilingEnabled = false;
}
#if defined(TRACK_SAMPLE_SOURCES)
if (_isIProfilingEnabled)
{
_trackProfilingActivity = feGetEnv("TR_TrackIProfilerActivity") != NULL;
}
#endif
}
}

Expand Down Expand Up @@ -3982,14 +3988,16 @@ TR_IProfiler::postIprofilingBufferToWorkingQueue(J9VMThread * vmThread, const U_
// Method executed by the java thread when jitHookBytecodeProfiling() is called
bool TR_IProfiler::processProfilingBuffer(J9VMThread *vmThread, const U_8* dataStart, UDATA size)
{
if (_numOutstandingBuffers >= TR::Options::_iprofilerNumOutstandingBuffers ||
if ((_numOutstandingBuffers >= TR::Options::_iprofilerNumOutstandingBuffers ||
_compInfo->getPersistentInfo()->getLoadFactor() >= 1) // More active threads than CPUs
#if defined(TRACK_SAMPLE_SOURCES)
&& !_trackProfilingActivity // If tracking is enabled we want to parse the buffer on the dedicated IProfiler thread
#endif
)
{
if (100*_numRequestsSkipped >= (uint64_t)TR::Options::_iprofilerBufferMaxPercentageToDiscard * _numRequests)
{
// too many skipped requests; let the java thread handle this one
//records = parseBuffer(vmThread, cursor, size, iProfiler);
//setProfilingBufferCursor(vmThread, (U_8*)cursor);
return false; // delegate the processing to the java thread
}
else // skip this request altogether
Expand All @@ -4004,8 +4012,6 @@ bool TR_IProfiler::processProfilingBuffer(J9VMThread *vmThread, const U_8* dataS
{
// If posting fails, we should let the app thread process the buffer
return false;
//_numRequestsSkipped++;
//setProfilingBufferCursor(vmThread, (U_8*)dataStart);
}
}
return true;
Expand Down Expand Up @@ -4144,6 +4150,55 @@ static inline uint16_t readU16(U_8 *_offset)
static inline uint16_t readU16(U_8 *_offset) { return (*(U_16*)(_offset)); }
#endif /* !FIXUP_UNALIGNED */

#if defined(TRACK_SAMPLE_SOURCES)
void
TR_IProfiler::TopFiveMethods::processElement(const std::pair<const J9ROMMethod*, uint32_t> & newElem)
{
if (_sortedList.size() == 5)
{
if (_sortedList.back().second < newElem.second)
{
// Take the last element out (smallest) to make room for the new one
_sortedList.pop_back();
}
}
if (_sortedList.size() < 5)
{
// Insert new element
bool inserted = false;
for (auto it = _sortedList.begin(); it != _sortedList.end(); ++it)
{
if (it->second < newElem.second) // compare number of samples
{
_sortedList.insert(it, newElem);
inserted = true;
break;
}
} // end for
if (!inserted)
{
_sortedList.push_back(newElem);
}
}
} // processElement

void
TR_IProfiler::TopFiveMethods::print(const std::map<const J9ROMMethod *, std::string> &methodNames)
{
TR_VerboseLog::vlogAcquire();
TR_VerboseLog::writeLine(TR_Vlog_IPROFILER, "Top five methods in terms of samples in the last interval:");
for (auto it = _sortedList.cbegin(); it != _sortedList.cend(); ++it)
{
const J9ROMMethod *romMethod = it->first;
uint32_t numSamples = it->second;
auto methodEntryIter = methodNames.find(romMethod);
const std::string methodName = (methodEntryIter != methodNames.end()) ? methodEntryIter->second : "";
TR_VerboseLog::writeLine(TR_Vlog_IPROFILER, "%lu samples in method %p %s", (unsigned long)numSamples, romMethod, methodName.c_str());
}
TR_VerboseLog::vlogRelease();
}

#endif //#if defined(TRACK_SAMPLE_SOURCES)

//------------------------------ parseBuffer -------------------------
// Parses a buffer from the VM and populates a hash table
Expand Down Expand Up @@ -4414,7 +4469,39 @@ UDATA TR_IProfiler::parseBuffer(J9VMThread * vmThread, const U_8* dataStart, UDA

if (addSample && !verboseReparse)
{
profilingSample ((uintptr_t)pc, (uintptr_t)data, true);
TR_IPBytecodeHashTableEntry *entry = profilingSample ((uintptr_t)pc, (uintptr_t)data, true);
#if defined(TRACK_SAMPLE_SOURCES)
if (_trackProfilingActivity && entry && vmThread == _iprofilerThread) // only IProfiler thread is allowed to execute this)
{
// If this sample does not yet have the romMethod set, find it and set it
if (entry->getROMMethod() == NULL)
{
J9ROMClass *romClass = NULL;
J9ROMMethod *romMethod = findROMMethodFromPC(vmThread, (uintptr_t)pc, romClass);
entry->setROMMethod(romMethod);
// Determine the full method name and memorize it
if (romMethod && romClass)
{
auto methodIt = _methodNames.find(romMethod);
if (methodIt == _methodNames.end())
{
J9UTF8* className = J9ROMCLASS_CLASSNAME(romClass);
J9UTF8* name = J9ROMMETHOD_NAME(romMethod);
J9UTF8* signature = J9ROMMETHOD_SIGNATURE(romMethod);
std::string fullMethodName(utf8Data(className), J9UTF8_LENGTH(className));
size_t len = J9UTF8_LENGTH(className) + J9UTF8_LENGTH(name) + J9UTF8_LENGTH(signature) + 2;
fullMethodName.reserve(len);
fullMethodName.push_back('.');
fullMethodName.append(utf8Data(name), J9UTF8_LENGTH(name));
fullMethodName.append(utf8Data(signature), J9UTF8_LENGTH(signature));
_methodNames.insert({romMethod, fullMethodName});
}
}
}
// Search the hashtable for my method and increment the number of samples
_samplesPerMethod[entry->getROMMethod()]++;
}
#endif
records++;
}
}
Expand All @@ -4426,7 +4513,31 @@ UDATA TR_IProfiler::parseBuffer(J9VMThread * vmThread, const U_8* dataStart, UDA
return 0;
}
_iprofilerNumRecords += records;

#if defined(TRACK_SAMPLE_SOURCES)
// If enough time has passed since the last operation
// determine the top 5 romMethods with the most number of samples and
// print their names. Then clear _samplesPerMethod and start all over.
static uint64_t lastPrintTime = 0;
static uint64_t lastNumIPsamples = 0;
if (_trackProfilingActivity && vmThread == _iprofilerThread) // only IProfiler thread is allowed to execute this)
{
uint64_t crtTime = _compInfo->getPersistentInfo()->getElapsedTime();
if (crtTime - lastPrintTime >= 5000) // every 5 seconds or so
{
TopFiveMethods topFive;
for (auto it = _samplesPerMethod.begin(); it != _samplesPerMethod.end(); ++it)
topFive.processElement(*it);
TR_VerboseLog::writeLineLocked(TR_Vlog_IPROFILER, "t=%lu IP samples in the last interval = %lu Total CPU spent in IP thread = %u ms",
(unsigned long)crtTime, (unsigned long)(_iprofilerNumRecords - lastNumIPsamples),
(unsigned)(j9thread_get_self_cpu_time(j9thread_self())/1000000));
topFive.print(_methodNames);
// Prepare for next interval
_samplesPerMethod.clear();
lastPrintTime = crtTime;
lastNumIPsamples = _iprofilerNumRecords;
}
}
#endif
return records;
}

Expand Down
38 changes: 36 additions & 2 deletions runtime/compiler/runtime/IProfiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@
* Interpreter Profiling Framework" by Ian Gartley, Marius Pirvu, Vijay
* Sundaresan and Nikola Grecvski, published in *Code Generation and Optimization (CGO)*, 2013.
*/
#define TRACK_SAMPLE_SOURCES
#if defined(TRACK_SAMPLE_SOURCES)
#include <list>
#include <string>
#include <map>
#endif // defined(TRACK_SAMPLE_SOURCES)

#include "j9.h"
#include "j9cfg.h"
Expand Down Expand Up @@ -196,9 +202,18 @@ class TR_IPBytecodeHashTableEntry
void * operator new (size_t size, void * placement) {return placement;}
void operator delete(void *p, void *) {}

TR_IPBytecodeHashTableEntry(uintptr_t pc) : _next(NULL), _pc(pc), _lastSeenClassUnloadID(-1), _entryFlags(0), _persistFlags(IPBC_ENTRY_CAN_PERSIST_FLAG) {}
TR_IPBytecodeHashTableEntry(uintptr_t pc) : _next(NULL), _pc(pc), _lastSeenClassUnloadID(-1), _entryFlags(0), _persistFlags(IPBC_ENTRY_CAN_PERSIST_FLAG)
{
#if defined(TRACK_SAMPLE_SOURCES)
_romMethod = NULL;
#endif
}
virtual ~TR_IPBytecodeHashTableEntry() {}
uintptr_t getPC() const { return _pc; }
#if defined(TRACK_SAMPLE_SOURCES)
J9ROMMethod *getROMMethod() const { return _romMethod; }
void setROMMethod(J9ROMMethod *method) { _romMethod = method; }
#endif // TRACK_SAMPLE_SOURCES
TR_IPBytecodeHashTableEntry * getNext() const { return _next; }
void setNext(TR_IPBytecodeHashTableEntry *n) { _next = n; }
int32_t getLastSeenClassUnloadID() const { return _lastSeenClassUnloadID; }
Expand Down Expand Up @@ -243,6 +258,9 @@ class TR_IPBytecodeHashTableEntry
protected:
TR_IPBytecodeHashTableEntry *_next;
uintptr_t _pc;
#if defined(TRACK_SAMPLE_SOURCES)
J9ROMMethod *_romMethod; // romMethod containing the _pc
#endif // TRACK_SAMPLE_SOURCES
int32_t _lastSeenClassUnloadID;

enum TR_PersistenceFlags
Expand Down Expand Up @@ -818,6 +836,22 @@ class TR_IProfiler : public TR_ExternalProfiler

volatile TR_IprofilerThreadLifetimeStates _iprofilerThreadLifetimeState;

#if defined(TRACK_SAMPLE_SOURCES)
bool _trackProfilingActivity;
// Maps ROMMethods to number of samples. Uses malloc underneath
std::map<const J9ROMMethod *, uint32_t> _samplesPerMethod;
// Maps ROMMethods to their names; this avoids any class unloading concerns
std::map<const J9ROMMethod *, std::string> _methodNames;
class TopFiveMethods
{
public:
void processElement(const std::pair<const J9ROMMethod*, uint32_t> & newElem);
void print(const std::map<const J9ROMMethod *, std::string> &names);
private:
std::list<std::pair<const J9ROMMethod*, uint32_t> > _sortedList;
}; // class TopFiveMethods
#endif

public:
static int32_t _STATS_noProfilingInfo;
static int32_t _STATS_doesNotWantToGiveProfilingInfo;
Expand Down Expand Up @@ -852,7 +886,7 @@ class TR_IProfiler : public TR_ExternalProfiler

static int32_t _STATS_IPEntryRead;
static int32_t _STATS_IPEntryChoosePersistent;
};
}; // TR_IProfiler

void printIprofilerStats(TR::Options *options, J9JITConfig * jitConfig, TR_IProfiler *iProfiler, const char *event);
void turnOffInterpreterProfiling(J9JITConfig *jitConfig);
Expand Down

0 comments on commit 15b5abe

Please sign in to comment.