Skip to content
Cameron Smith edited this page Jul 6, 2015 · 8 revisions

Table of Contents

The following instructions are for execution on the Stampede Phi/MIC only; the host processors are not used.

See the Stampede wiki page for system details https://portal.xsede.org/tacc-stampede

Build

Setup the environment

module swap intel intel/15.0.2
module swap mvapich2 impi/5.0.2
module load cmake

Create a toolchain file

Create 'StampedePhi.cmake' with the following contents

set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_C_COMPILER icc)
set(CMAKE_CXX_COMPILER icpc)
set(CMAKE_Fortran_COMPILER ifort)
set(CMAKE_AR /usr/bin/ar CACHE STRING "" FORCE)

set(intel "/opt/apps/intel/15/composer_xe_2015.2.164")
set(impi "/opt/apps/intel15/impi/5.0.2.044")
set(compilermic "${intel}/compiler/lib/mic")
set(cxxflags "-mmic -Wl,-rpath-link=${compilermic} -Wl,--as-needed ")
set(opt "-opt-assume-safe-padding -opt-streaming-stores always -opt-streaming-cache-evict=0")

set(CMAKE_C_FLAGS -mmic CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS "${cxxflags}" CACHE STRING "" FORCE)
set(CMAKE_Fortran_FLAGS -mmic CACHE STRING "" FORCE)

set(CMAKE_C_FLAGS_RELEASE "-mmic ${opt} " CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS_RELEASE "-mmic ${opt} " CACHE STRING "" FORCE)
set(CMAKE_Fortran_FLAGS_RELEASE "-mmic ${opt} -align array64byte " CACHE STRING "" FORCE)

set(CMAKE_FIND_ROOT_PATH 
    /usr/linux-k1om-4.7/linux-k1om/usr/lib64/
    /usr/linux-k1om-4.7/linux-k1om/usr/
    ${impi}/mic/lib/release_mt
    ${impi}/mic/lib/
    ${impi}/mic/)

set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

Create a build directory

mkdir buildPhi
cd buildPhi

Run CMake

cmake \
  -DCMAKE_TOOLCHAIN_FILE=../StampedePhi.cmake \
  -DCMAKE_BUILD_TYPE=Release \
  -DPHASTA_INCOMPRESSIBLE=OFF \
  -DPHASTA_COMPRESSIBLE=ON \
  ..

Build

make VERBOSE=1

Run

Setup the environment

module swap intel intel/15.0.2
module swap mvapich2 impi/5.0.2

Create a run script

Create 'runPhi.sh' with the following contents

#!/bin/bash 
#Force use of the eager protocol. This allowed mpi jobs with > 1024 ranks
# to run using impi 4.  It is not known if this issue still exists for impi 5.
large=$((16*1024*1024*1024))  #buwhahahahah
export I_MPI_EAGER_THRESHOLD=$large
export I_MPI_INTRANODE_EAGER_THRESHOLD=$large
export MIC_I_MPI_EAGER_THRESHOLD=$large
export MIC_I_MPI_INTRANODE_EAGER_THRESHOLD=$large

export MIC_MY_NSLOTS=TotalNumProcesses
export MIC_PPN=NumProcessesPerPhi
ibrun.symm -m /path/to/buildPhi/bin/phastaC.exe

Submit the job

Note, '-n' specifies the total number of host processes. Since we are not running on the host processes we will set '-n' to equal the number of nodes; a value of zero is not valid.

cd path/to/case/directory # should contain a N-procs_case sub-directory
sbatch -A TG-CTS140031 -p development -N numNodes -n numNodes -t timeInMinutes ./runPhi.sh