BUILD_SHARED_LIBS
: build shared librariesUSE_TIMEMORY
: enable timemory instrumentationUSE_PROFILING
: enable simpleMPI_Wtime
timer
USE_CUDA
: build the matrix-add-gpuUSE_PAPI
: enable PAPI countersUSE_COMPILER_INSTRUMENTATION
: enable timemory's compiler instrumentation
git clone <this-repo> slate-roofline-source
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${PWD}/slate-roofline -DUSE_CUDA=ON -B build-slate-roofline slate-roofline-source
cmake --build build-slate-roofline --target install --parallel 4
The build installed matrix-add-cpu
, matrix-add-gpu
, and matrix-add-gpu-pinned
in the
slate-roofline/bin
folder.
export PATH=${PWD}/slate-roofline/bin:${PATH}
srun -n 8 -c 1 matrix-add-cpu
srun -n 8 -c 1 matrix-add-gpu
srun -n 8 -c 1 matrix-add-gpu-pinned