Skip to content

Commit

Permalink
Change inliner estimate code size heuristic
Browse files Browse the repository at this point in the history
Do not reset the _analyzedSize value if there was a large callee
or there was estimation failure for some other reason.
Keep track of all the code we have analyzed (including code that
we estimated and then discarded) so that we do not have an
undefined amount of estimating on a given top level callee.
Introduce an "allowance" factor that gets applied on the
size threshold to separate how much we are allowed to analyze
and how much we are allowed to bring in as inlined call graph.
This commit should limit the amount of code we are allowed to
analyze to be a factor no more than 2x of what we we are allowed
to bring in to the compiled method via inlining.

Added a new env var TR_AnalyzedAllowanceFactor that allows a
user to specify the factor by which we multiply the original
estimate size threshold to control how much we can analyze
even with backtracking.

Added a new env var TR_GraceInliningThreshold that controls
how big a callee is allowed to be, in order to be inlined even
if the call graph size estimate is exceeded.

Misc. cleanups:
Fixed an inconsistency in how we reset variables during backtracking
Renamed _optimisticSize to _analyzedSize since it is less confusing
Fixed a typo in a variable name
Fixed some white spaces

Signed-off-by: Vijay Sundaresan <[email protected]>
  • Loading branch information
vijaysun-omr committed Jan 18, 2025
1 parent 9362c83 commit 2a2ccc3
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 44 deletions.
91 changes: 50 additions & 41 deletions runtime/compiler/optimizer/J9EstimateCodeSize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ const float TR_J9EstimateCodeSize::CONST_ARG_IN_CALLEE_ADJUSTMENT_FACTOR = 0.75f

#define DEFAULT_FREQ_CUTOFF 40

#define DEFAULT_GRACE_INLINING_THRESHOLD 100

#define DEFAULT_ANALYZED_ALLOWANCE_FACTOR 2

/*
DEFINEs are ugly in general, but putting
Expand Down Expand Up @@ -563,7 +566,7 @@ TR_J9EstimateCodeSize::estimateCodeSize(TR_CallTarget *calltarget, TR_CallStack
{
heuristicTrace(tracer(),"Subtracting 1 from sizes because _isLeaf is true");
--_realSize;
--_optimisticSize;
--_analyzedSize;
}
return true;
}
Expand Down Expand Up @@ -993,7 +996,7 @@ TR_J9EstimateCodeSize::processBytecodeAndGenerateCFG(TR_CallTarget *calltarget,

/********* PHASE 2: Generate CFG **********/

heuristicTrace(tracer(),"--- Done Iterating over Bytecodes in call to %s. size = %d _recursionDepth = %d _optimisticSize = %d _realSize = %d _sizeThreshold = %d",callerName, size, _recursionDepth, _optimisticSize, _realSize, _sizeThreshold);
heuristicTrace(tracer(),"--- Done Iterating over Bytecodes in call to %s. size = %d _recursionDepth = %d _analyzedSize = %d _realSize = %d _sizeThreshold = %d",callerName, size, _recursionDepth, _analyzedSize, _realSize, _sizeThreshold);

if (hasThisCalls && calltarget->_calleeSymbol)
calltarget->_calleeSymbol->setHasThisCalls(true);
Expand Down Expand Up @@ -1272,7 +1275,7 @@ TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallSt

heuristicTrace(tracer(),
"*** Depth %d: ECS to begin for target %p signature %s size assuming we can partially inline (optimistic size) = %d total real size so far = %d sizeThreshold %d",
_recursionDepth, calltarget, callerName, _optimisticSize, _realSize,
_recursionDepth, calltarget, callerName, _analyzedSize, _realSize,
_sizeThreshold);

TR_ByteCodeInfo newBCInfo;
Expand Down Expand Up @@ -1328,10 +1331,10 @@ TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallSt
TR_PrexArgInfo* argsFromSymbol = TR_PrexArgInfo::buildPrexArgInfoForMethodSymbol(methodSymbol, tracer());

if (!TR_PrexArgInfo::validateAndPropagateArgsFromCalleeSymbol(argsFromSymbol, calltarget->_ecsPrexArgInfo, tracer()))
{
{
heuristicTrace(tracer(), "*** Depth %d: ECS end for target %p signature %s. Incompatible arguments", _recursionDepth, calltarget, callerName);
return returnCleanup(ECS_ARGUMENTS_INCOMPATIBLE);
}
}

NeedsPeekingHeuristic nph(calltarget, bci, methodSymbol, comp());
//this might be a little bit too verbose, so let's hide the heuristic's output behind this env var
Expand All @@ -1341,7 +1344,7 @@ TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallSt
nph.setTracer(tracer());
}

bool wasPeekingSuccessfull = false;
bool wasPeekingSuccessful = false;

const static bool debugMHInlineWithOutPeeking = feGetEnv("TR_DebugMHInlineWithOutPeeking") ? true: false;
bool mhInlineWithPeeking = comp()->getOption(TR_DisableMHInlineWithoutPeeking);
Expand Down Expand Up @@ -1373,7 +1376,7 @@ TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallSt
{
heuristicTrace(tracer(), "*** Depth %d: ECS CSI -- peeking was successfull for calltarget %p", _recursionDepth, calltarget);
_inliner->getUtil()->clearArgInfoForNonInvariantArguments(calltarget->_ecsPrexArgInfo, methodSymbol, tracer());
wasPeekingSuccessfull = true;
wasPeekingSuccessful = true;
}
}
else if (inlineArchetypeSpecimen && !mhInlineWithPeeking && debugMHInlineWithOutPeeking)
Expand Down Expand Up @@ -1426,7 +1429,7 @@ TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallSt
}

bool callsitesAreCreatedFromTrees = false;
if (wasPeekingSuccessfull
if (wasPeekingSuccessful
&& comp()->getOrCreateKnownObjectTable()
&& calltarget->_calleeMethod->convertToMethod()->isArchetypeSpecimen())
{
Expand Down Expand Up @@ -1503,7 +1506,7 @@ TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallSt
}
#endif // JAVA_SPEC_VERSION >= 21

if (!bci.findAndCreateCallsitesFromBytecodes(wasPeekingSuccessfull, iteratorWithState))
if (!bci.findAndCreateCallsitesFromBytecodes(wasPeekingSuccessful, iteratorWithState))
{
heuristicTrace(tracer(), "*** Depth %d: ECS end for target %p signature %s. bci.findAndCreateCallsitesFromBytecode failed", _recursionDepth, calltarget, callerName);
return returnCleanup(ECS_CALLSITES_CREATION_FAILED);
Expand Down Expand Up @@ -1582,7 +1585,7 @@ TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallSt
}


/*************** PHASE 3: Optimistically Assume we can partially inline calltarget and add to an optimisticSize ******************/
/*************** PHASE 3: Optimistically Assume we can partially inline calltarget and add to an analyzedSize ******************/

TR_Queue<TR::Block> callBlocks(comp()->trMemory());
bool isCandidate = trimBlocksForPartialInlining(calltarget, &callBlocks);
Expand All @@ -1599,20 +1602,23 @@ TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallSt
}

if (isCandidate)
_optimisticSize += calltarget->_partialSize;
_analyzedSize += calltarget->_partialSize;
else
_optimisticSize += calltarget->_fullSize;
_analyzedSize += calltarget->_fullSize;

int32_t sizeThreshold = _sizeThreshold;
if (isCandidate)
sizeThreshold = std::max(4096, sizeThreshold);
///if(_optimisticSize > _sizeThreshold) // even optimistically we've blown our budget
heuristicTrace(tracer(),"--- Depth %d: Checking Optimistic size vs Size Threshold: _optimisticSize %d _sizeThreshold %d sizeThreshold %d ",_recursionDepth, _optimisticSize, _sizeThreshold, sizeThreshold);
///if(_analyzedSize > _sizeThreshold) // even optimistically we've blown our budget
heuristicTrace(tracer(),"--- Depth %d: Checking Analyzed size vs Size Threshold: _analyzedSize %d _sizeThreshold %d sizeThreshold %d ",_recursionDepth, _analyzedSize, _sizeThreshold, sizeThreshold);

static const char *af = feGetEnv("TR_AnalyzedAllowanceFactor");
static const int32_t allowanceFactor = af ? atoi(af) : DEFAULT_ANALYZED_ALLOWANCE_FACTOR;

if (_optimisticSize > sizeThreshold) // even optimistically we've blown our budget
if (_analyzedSize > allowanceFactor*sizeThreshold) // even optimistically we've blown our budget
{
calltarget->_isPartialInliningCandidate = false;
heuristicTrace(tracer(), "*** Depth %d: ECS end for target %p signature %s. optimisticSize exceeds Size Threshold", _recursionDepth, calltarget, callerName);
heuristicTrace(tracer(), "*** Depth %d: ECS end for target %p signature %s. analyzedSize exceeds Size Threshold", _recursionDepth, calltarget, callerName);
return returnCleanup(ECS_OPTIMISTIC_SIZE_THRESHOLD_EXCEEDED);
}

Expand Down Expand Up @@ -1712,26 +1718,26 @@ TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallSt
continue;
}

if (_optimisticSize <= sizeThreshold) // for multiple calltargets, is this the desired behaviour?
if (_analyzedSize <= allowanceFactor*sizeThreshold) // for multiple calltargets, is this the desired behaviour?
{
_recursionDepth++;
_numOfEstimatedCalls++;

_lastCallBlockFrequency = currentBlock->getFrequency();

debugTrace(tracer(),"About to call ecs on call target %p at depth %d _optimisticSize = %d _realSize = %d _sizeThreshold = %d",
targetCallee, _recursionDepth, _optimisticSize, _realSize, _sizeThreshold);
debugTrace(tracer(),"About to call ecs on call target %p at depth %d _analyzedSize = %d _realSize = %d _sizeThreshold = %d",
targetCallee, _recursionDepth, _analyzedSize, _realSize, _sizeThreshold);
heuristicTrace(tracer(),"--- Depth %d: EstimateCodeSize to recursively estimate call from %s to %s",_recursionDepth, callerName, calleeName);

int32_t origOptimisticSize = _optimisticSize;
int32_t origAnalyzedSize = _analyzedSize;
int32_t origRealSize = _realSize;
bool prevNonColdCalls = _hasNonColdCalls;
bool estimateSuccess = estimateCodeSize(targetCallee, &callStack); //recurseDown = true
bool calltargetSetTooBig = false;
bool calleeHasNonColdCalls = _hasNonColdCalls;
_hasNonColdCalls = prevNonColdCalls;// reset the bool for the parent

// update optimisticSize and cull candidates
// update analyzedSize and cull candidates

if ((comp()->getMethodHotness() >= warm) && comp()->isServerInlining())
{
Expand Down Expand Up @@ -1770,48 +1776,51 @@ TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallSt
}


if (_optimisticSize - origOptimisticSize > bigCalleeThreshold)
if (_analyzedSize - origAnalyzedSize > bigCalleeThreshold)
{
///printf("set warmcallgraphtoobig for method %s at index %d\n", calleeName, newBCInfo._byteCodeIndex);fflush(stdout);
calltarget->_calleeMethod->setWarmCallGraphTooBig( newBCInfo.getByteCodeIndex(), comp());
heuristicTrace(tracer(), "set warmcallgraphtoobig for method %s at index %d\n", calleeName, newBCInfo.getByteCodeIndex());
//_optimisticSize = origOptimisticSize;
//_analyzedSize = origAnalyzedSize;
//_realSize = origRealSize;
calltargetSetTooBig = true;

}
}

if (!estimateSuccess && !calltargetSetTooBig)
{
int32_t estimatedSize = (_optimisticSize - origOptimisticSize);
int32_t estimatedSize = (_analyzedSize - origAnalyzedSize);
int32_t bytecodeSize = targetCallee->_calleeMethod->maxBytecodeIndex();
bool inlineAnyway = false;

if ((_optimisticSize - origOptimisticSize) < 40)
static const char *git = feGetEnv("TR_GraceInliningThreshold");
static const int32_t graceInliningThreshold = git ? atoi(git) : DEFAULT_GRACE_INLINING_THRESHOLD;

if (estimatedSize < graceInliningThreshold)
inlineAnyway = true;
else if (estimatedSize < 100)
{
if ((estimatedSize < bytecodeSize) || ((bytecodeSize - estimatedSize)< 20))
inlineAnyway = true;
}

// non cold calls are checked here probably since we did not add any call sites from the callee that failed estimation,
// and so we are making sure that we did not miss out on anything important (non cold)
//
if (inlineAnyway && !calleeHasNonColdCalls)
{
_optimisticSize = origOptimisticSize;
// This resetting is probably needed on this path since we are inlining despite exceeding some condition/threshold
// and so it would be an odd state to carry on with _analyzedSize being potentially more than sizeThreshold
//
_analyzedSize = origAnalyzedSize;
_realSize = origRealSize;
}
else if (!_inliner->alwaysWorthInlining(targetCallee->_calleeMethod, NULL))
{
calltarget->_isPartialInliningCandidate = false;
callSites[i]->removecalltarget(j, tracer(),
Callee_Too_Many_Bytecodes);
_optimisticSize = origOptimisticSize;
//_analyzedSize = origAnalyzedSize;
_realSize = origRealSize;
calltarget->addDeadCallee(callSites[i]);
j--;
_numOfEstimatedCalls--;
heuristicTrace(tracer(),"Depth %d: estimateCodeSize skipping estimated call and resetting _optimisticSize to %d and _realSize to %d", _recursionDepth, _optimisticSize, _realSize);
heuristicTrace(tracer(),"Depth %d: estimateCodeSize skipping estimated call and resetting _analyzedSize to %d and _realSize to %d", _recursionDepth, _analyzedSize, _realSize);
}

if(comp()->getVisitCount() > HIGH_VISIT_COUNT)
Expand All @@ -1822,19 +1831,19 @@ TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallSt
}
else if (calltargetSetTooBig)
{
_optimisticSize = origOptimisticSize;
_realSize = origRealSize;

if (!_inliner->alwaysWorthInlining(targetCallee->_calleeMethod, NULL))
{
calltarget->_isPartialInliningCandidate = false;
callSites[i]->removecalltarget(j, tracer(),
Callee_Too_Many_Bytecodes);
//_analyzedSize = origAnalyzedSize;
_realSize = origRealSize;
calltarget->addDeadCallee(callSites[i]);
j--;
_numOfEstimatedCalls--;

heuristicTrace(tracer(),"Depth %d: estimateCodeSize skipping too big estimated call and resetting _optimisticSize to %d and _realSize to %d", _recursionDepth, _optimisticSize, _realSize);

heuristicTrace(tracer(),"Depth %d: estimateCodeSize skipping too big estimated call and resetting _analyzedSize to %d and _realSize to %d", _recursionDepth, _analyzedSize, _realSize);
}

if(comp()->getVisitCount() > HIGH_VISIT_COUNT)
Expand All @@ -1848,7 +1857,7 @@ TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallSt
}
else
{
heuristicTrace(tracer(),"Depth %d: estimateCodeSize aborting due to _optimisticSize: %d > sizeThreshold: %d", _recursionDepth, _optimisticSize,sizeThreshold);
heuristicTrace(tracer(),"Depth %d: estimateCodeSize aborting due to _analyzedSize: %d > sizeThreshold: %d", _recursionDepth, _analyzedSize,sizeThreshold);
break;
}
}
Expand All @@ -1857,8 +1866,8 @@ TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallSt
{
calltarget->addCallee(callSites[i]);
heuristicTrace(tracer(), "Depth %d: Subtracting %d from optimistic and real size to account for eliminating call", _recursionDepth, bci.estimatedCodeSize());
if (_optimisticSize > bci.estimatedCodeSize())
_optimisticSize -= bci.estimatedCodeSize(); // subtract what we added before for the size of the call instruction
if (_analyzedSize > bci.estimatedCodeSize())
_analyzedSize -= bci.estimatedCodeSize(); // subtract what we added before for the size of the call instruction
if (_realSize > bci.estimatedCodeSize())
_realSize -= bci.estimatedCodeSize();
}
Expand Down
6 changes: 3 additions & 3 deletions runtime/compiler/optimizer/J9EstimateCodeSize.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ class TR_J9EstimateCodeSize : public TR_EstimateCodeSize
{
public:

TR_J9EstimateCodeSize() : TR_EstimateCodeSize(), _optimisticSize(0), _lastCallBlockFrequency(-1) { }
TR_J9EstimateCodeSize() : TR_EstimateCodeSize(), _analyzedSize(0), _lastCallBlockFrequency(-1) { }

int32_t getOptimisticSize() { return _optimisticSize; }
int32_t getOptimisticSize() { return _analyzedSize; }

/** \brief
* The inliner weight adjustment factor used for java/lang/String* compression related methods.
Expand Down Expand Up @@ -164,7 +164,7 @@ class TR_J9EstimateCodeSize : public TR_EstimateCodeSize


int32_t _lastCallBlockFrequency;
int32_t _optimisticSize; // size if we assume we are doing a partial inline
int32_t _analyzedSize; // size if we assume we are doing a partial inline
};

#define NUM_PREV_BC 5
Expand Down

0 comments on commit 2a2ccc3

Please sign in to comment.