Skip to content

Commit

Permalink
Optimize x86 pipeline on FirstFitPriority
Browse files Browse the repository at this point in the history
  • Loading branch information
ShikharJ committed Jan 5, 2021
1 parent 8bdab94 commit fbab6cb
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 6 deletions.
2 changes: 1 addition & 1 deletion tools/SeeDot/seedot/compiler/codegen/codegenBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,7 +838,7 @@ def sortkey(a):
varToLiveRange.sort(key=sortkey, reverse=True)
memAlloc = [(l * m // 8, i, j) for ([i, j], k, l, m) in varToLiveRange if k not in self.notScratch]
varOrderAndSize = [(k, l * m // 8) for ([i, j], k, l, m) in varToLiveRange if k not in self.notScratch]
maxAllowedMemUsage = 200000
maxAllowedMemUsage = Config.memoryLimit
timeout = 60
bestCaseMemUsage = DLXInputGen.generateDLXInput(memAlloc, 1, 0, True)
if maxAllowedMemUsage < bestCaseMemUsage:
Expand Down
11 changes: 11 additions & 0 deletions tools/SeeDot/seedot/compiler/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@

import seedot.config as config

import numpy as np

# The Compiler class reads in the input code, converts it first into an AST, and subsequently into an IR which
# contains a sequence of function calls (which are implemented by hand in a library). The IR is fed into the
# desired target codegen, which outputs the C/C++ code which can be run on the target device.
Expand All @@ -55,6 +57,7 @@ def __init__(self, algo, version, target, inputFile, outputDir, profileLogFile,
self.generateAllFiles = generateAllFiles
self.id = str(id) if id is not None else ""
self.printSwitch = printSwitch
self.varSizes = {}

self.intermediateScales = {}
self.substitutions = substitutions
Expand Down Expand Up @@ -145,6 +148,14 @@ def genCodeWithFuncCalls(self, ast):
# All state variables are used for codegen.
state = [compiler.varDeclarations, compiler.varDeclarationsLocal, compiler.varScales, compiler.varIntervals, compiler.intConstants, compiler.expTables, compiler.globalVars, compiler.internalVars, compiler.floatConstants, compiler.substitutions, compiler.demotedVarsOffsets, compiler.varsForBitwidth, compiler.varLiveIntervals, compiler.notScratch, compiler.coLocatedVariables]

for key in compiler.varDeclarations.keys():
val = compiler.varDeclarations[key]
if type.isTensor(val):
dims = val.shape
self.varSizes[key] = np.prod(dims)
else:
self.varSizes[key] = 1

# Raw live ranges do not capture the scope of the first/last usage of a variable, so they require post-processing.
state[12] = self.adjustLiveRanges(state[12], compiler.allDepths)

Expand Down
12 changes: 10 additions & 2 deletions tools/SeeDot/seedot/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
availableBitwidths = [8, 16, 32]

# Range of max scale factor used for exploration.
# In the old SeeDot (PLDI'19), this explores across the maxscale parameter.
# In the new SeeDot (OOPSLA'20), this explores across the scale of the input variable 'X'.
maxScaleRange = 0, -wordLength

# TanH approximation limit.
# TanH approximation limit. Used by old SeeDot (PLDI'19).
tanhLimit = 1.0

# MSBuild location
Expand All @@ -18,7 +20,7 @@
r"C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\MSBuild\Current\Bin\MSBuild.exe"
]

# Not supported (ddsEnabled = False and vbwEnabled = True).
# IMPORTANT NOTE: Unsupported configuration (ddsEnabled = False and vbwEnabled = True).

# Enable data-driven scale computation. Turning this to False reverts the compiler to old verion (PLDI'19).
ddsEnabled = True
Expand All @@ -43,6 +45,12 @@
# Number of offsets tried out for each variable (except X, for which 9 are tried) when they are demoted to 8 bits one at a time.
offsetsPerDemotedVariable = 3

# For a classification algorithm, fixed point code can have this much drop in accuracy compared to floating point code. Not used in regression algorithms.
permittedClassificationAccuracyLoss = 2.0

# For a regression algorithm, fixed point code can have this much more numerical loss compared to floating point code. Not used in classification algorithms.
permittedRegressionNumericalLossMargin = 90.0

# Following classes are used sanity checks for arguments passed to the compiler, to prevent unexpected arguments being passed.
# These lists should be updated as the compiler is expanded to multiple algorithms and datasets.

Expand Down
25 changes: 22 additions & 3 deletions tools/SeeDot/seedot/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ def __init__(self, algo, version, target, trainingFile, testingFile, modelDir, s
self.biasShifts = {}
# For simplifying bias addition, populated after every code run, used for M3 codegen.
# In operations like WX + B, B is mostly used once in the code. So all the fixed point computations are clubbed into one.
self.varSizes = {}
# Map from a variable to number of elements it holds. Populated in floating point mode.

# This function is invoked right at the beginning for moving around files into the working directory.
def setup(self):
Expand Down Expand Up @@ -181,6 +183,7 @@ def compile(self, version, target, sf, generateAllFiles=True, id=None, printSwit
if version == config.Version.floatt:
self.variableSubstitutions = obj.substitutions
self.variableToBitwidthMap = dict.fromkeys(obj.independentVars, config.wordLength)
self.varSizes = obj.varSizes

self.problemType = obj.problemType
if id is None:
Expand Down Expand Up @@ -495,7 +498,23 @@ def performSearch(self):
totalSize = len(self.varDemoteDetails)
numBatches = int(np.ceil(totalSize / redBatchSize))

sortedVars = [i for (i, j) in self.varDemoteDetails]
sortedVars1 = []
sortedVars2 = []
for ((demoteVars, offset), _) in self.varDemoteDetails:
variableInMap = False
for demoteVar in demoteVars:
if demoteVar in self.varSizes:
variableInMap = True
if self.varSizes[demoteVar] >= Util.Config.largeVariableLimit:
sortedVars1.append((demoteVars, offset))
break
else:
sortedVars2.append((demoteVars, offset))
break
if not variableInMap:
sortedVars2.append((demoteVars, offset))

sortedVars = sortedVars1 + sortedVars2

self.varDemoteDetails = []
demotedVarsOffsets = dict(self.demotedVarsOffsets)
Expand Down Expand Up @@ -544,9 +563,9 @@ def performSearch(self):
acceptedAcc = lastStageAcc
for ((demotedVars, _), metrics) in self.varDemoteDetails:
acc = metrics[0]
if self.problemType == config.ProblemType.classification and (self.flAccuracy - acc) > 2.0:
if self.problemType == config.ProblemType.classification and (self.flAccuracy - acc) > config.permittedClassificationAccuracyLoss:
break
elif self.problemType == config.ProblemType.regression and acc > 90.0:
elif self.problemType == config.ProblemType.regression and acc > config.permittedRegressionNumericalLossMargin:
break
else:
okToDemote = demotedVars
Expand Down
4 changes: 4 additions & 0 deletions tools/SeeDot/seedot/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ class Config:
# -> Similarly in multiplication-like functions convolution, hadamard product etc.
x86MemoryOptimize = True
# Enable memory optimization in the generated fixed-point code in x86, arduino or m3 codegen.
memoryLimit = 200000
# The maximum memory present on the target device. Used if memory optimizations are enabled in the target codegen.
largeVariableLimit = 50000
# Any variable with more elements than this are prioritized for demotion to 8 bits.
defragmentEnabled = False
# Enable defragmentation. Currently not supported, so must be kept to False.
faceDetectionHacks = False
Expand Down

0 comments on commit fbab6cb

Please sign in to comment.