Skip to content

Commit

Permalink
Feature profiling workflow (cms-patatrack#18)
Browse files Browse the repository at this point in the history
* Create profiling folder

* Fix `VecArray` constructor for `Point`

* Error handling in `read_csv`

* Rename file

* Create workflow for profiling serial code

* Fix typo

* Add workflow that uploads the profiler logs as artifacts

* Update gitignore

* Fix wrong image usage for profiling

* Fix typo

* Formatting

* Update CMake file for including tbb and cuda

* Remove unneeded include

* Add annotations in profiling `main.cc`

* Add readme with descriptions on how to compile

* Fix typo
  • Loading branch information
sbaldu authored Mar 19, 2024
1 parent 1407ddd commit ad43e2b
Show file tree
Hide file tree
Showing 11 changed files with 10,419 additions and 12 deletions.
44 changes: 44 additions & 0 deletions .github/workflows/profile_serial.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: Profile serial code and save log

# The workflow gets triggered by pushes and pull requests
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
build:

runs-on: ubuntu-latest
strategy:
fail-fast: false

steps:
# checks out the code in the repository
- uses: actions/checkout@v3
with:
submodules: true

# pull docker images for building and profiling
- name: Pull docker images from DockerHub
run: |
docker pull sbalducci00/sbaldu:cluestering_arch.serial
docker pull sbalducci00/sbaldu:profiler
- name: Build
working-directory: ${{github.workspace}}
run: |
docker run -v $(pwd):/app -w /app/profiling \
sbalducci00/sbaldu:cluestering_arch.serial bash -c '
cmake -B build && make -C build
'
- name: Profile
working-directory: ${{github.workspace}}
run: |
docker run -v $(pwd):/app -w /app/profiling \
sbalducci00/sbaldu:profiler bash -c '
./build/serial.out
gprof -b ./build/serial.out
'
46 changes: 46 additions & 0 deletions .github/workflows/upload_profile_log.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Upload the logs of the profiler as artifacts

on:
push:
branches: [ "main" ]

jobs:
build-and-deploy:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v2
with:
submodules: true

# pull docker images for building and profiling
- name: Pull docker images from DockerHub
run: |
docker pull sbalducci00/sbaldu:cluestering_arch.serial
docker pull sbalducci00/sbaldu:profiler
- name: Build
working-directory: ${{github.workspace}}
run: |
docker run -v $(pwd):/app -w /app/profiling \
sbalducci00/sbaldu:cluestering_arch.serial bash -c '
cmake -B build && make -C build
'
- name: Profile
working-directory: ${{github.workspace}}/profiling
run: |
docker run -v $(pwd):/app -w /app/profiling \
sbalducci00/sbaldu:profiler bash -c '
./build/serial.out
mkdir -p logs
gprof -b ./build/serial.out > logs/test.log
'
- name: Archive log files
uses: actions/upload-artifact@v4
with:
name: PR_${{ github.event.pull_request.number }}
path: |
profiling/logs/test.log
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ build/
# Shared library files
*.so

# Profiling files
*out*
vgcore.*

# Executables
*.out
*run_tests.sh
Expand Down
4 changes: 2 additions & 2 deletions CLUEstering/CLUEstering.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
tbb_found = exists(str(*glob(join(path, 'lib/CLUE_CPU_TBB*.so'))))
if tbb_found:
import CLUE_CPU_TBB as cpu_tbb
cuda_found = exists(str(*glob(join(path, 'lib/CLUE_CPU_CUDA*.so'))))
cuda_found = exists(str(*glob(join(path, 'lib/CLUE_GPU_CUDA*.so'))))
if cuda_found:
import CLUE_GPU_CUDA as gpu_cuda
hip_found = exists(str(*glob(join(path, 'lib/CLUE_CPU_HIP*.so'))))
hip_found = exists(str(*glob(join(path, 'lib/CLUE_GPU_HIP*.so'))))
if hip_found:
import CLUE_GPU_HIP as gpu_hip

Expand Down
1 change: 0 additions & 1 deletion CLUEstering/alpaka/AlpakaCore/chooseDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

#include "alpakaConfig.h"
#include "alpakaDevices.h"
#include "Framework/Event.h"

namespace cms::alpakatools {

Expand Down
25 changes: 16 additions & 9 deletions CLUEstering/alpaka/DataFormats/Points.h
Original file line number Diff line number Diff line change
@@ -1,25 +1,32 @@
#ifndef points_h
#define points_h

#include <array>
#include "alpaka/AlpakaVecArray.h"
#include "alpaka/PointsAlpaka.h"
#include <algorithm>
#include <array>
#include <cmath>
#include <functional>
#include <iostream>
#include <vector>
#include "alpaka/PointsAlpaka.h"
#include "alpaka/AlpakaVecArray.h"

using cms::alpakatools::VecArray;

template <uint8_t Ndim>
struct Points {
template <uint8_t Ndim> struct Points {
Points() = default;
Points(const std::vector<VecArray<float, Ndim>>& coords, const std::vector<float>& weight)
: m_coords{coords}, m_weight{weight}, n{weight.size()} {}
Points(const std::vector<std::vector<float>>& coords, const std::vector<float>& weight)
Points(const std::vector<VecArray<float, Ndim>> &coords,
const std::vector<float> &weight)
: m_coords{coords}, m_weight{weight}, n{weight.size()} {
m_rho.resize(n);
m_delta.resize(n);
m_nearestHigher.resize(n);
m_clusterIndex.resize(n);
m_isSeed.resize(n);
}
Points(const std::vector<std::vector<float>> &coords,
const std::vector<float> &weight)
: m_weight{weight}, n{weight.size()} {
for (const auto& x : coords) {
for (const auto &x : coords) {
VecArray<float, Ndim> temp_vecarray;
for (auto value : x) {
temp_vecarray.push_back_unsafe(value);
Expand Down
146 changes: 146 additions & 0 deletions profiling/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
cmake_minimum_required(VERSION 3.16.0)
project(Profiling LANGUAGES CXX)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

set(CMAKE_CXX_FLAGS "-Wall -Wextra -g -O2")

# include alpaka extern subfolder
include_directories(../extern/alpaka/include)

# look for boost
find_package(Boost 1.75.0)

# if boost is not found, it's fetched from the official boost repository
if(NOT Boost_FOUND)
include(FetchContent)
FetchContent_Declare(
boost
URL https://boostorg.jfrog.io/artifactory/main/release/1.76.0/source/boost_1_76_0.tar.gz
)

FetchContent_GetProperties(boost)
if(NOT boost_POPULATED)
FetchContent_Populate(boost)
endif()
set(Boost_PATH ./build/_deps/boost-src)
else()
set(Boost_PATH ${Boost_INCLUDE_DIRS})
endif()

# include source directories
include_directories(../CLUEstering/alpaka/)

# CPU Serial
add_executable(serial.out main.cc)
# link boost
target_link_libraries(serial.out PRIVATE ${Boost_LIBRARIES})
target_include_directories(serial.out PRIVATE ${Boost_PATH})
# alpaka build flags
target_compile_options(
serial.out
PRIVATE -DALPAKA_HOST_ONLY -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_PRESENT
-DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
-DALPAKA_ACC_CPU_B_SEQ_T_SEQ_SYNC_BACKEND)
if(NOT "${ANNOTATE}" STREQUAL "")
target_compile_options(serial.out PRIVATE -DANNOTATE)
endif()
target_compile_options(serial.out PRIVATE -pg)
target_link_options(serial.out PRIVATE -pg)

find_package(TBB)

# CPU TBB
if(TBB_FOUND)
# compile cpu tbb module
add_executable(tbb.out main.cc)
target_link_libraries(tbb.out PRIVATE ${Boost_LIBRARIES})
target_include_directories(tbb.out PRIVATE ${Boost_PATH})
target_compile_options(
tbb.out
PRIVATE -ltbb -DALPAKA_ACC_CPU_B_TBB_T_SEQ_PRESENT
-DALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
-DALPAKA_ACC_CPU_B_TBB_T_SEQ_ASYNC_BACKEND)
target_link_libraries(tbb.out PRIVATE TBB::tbb)
if(NOT "${ANNOTATE}" STREQUAL "")
target_compile_options(tbb.out PRIVATE -DANNOTATE)
endif()
target_compile_options(serial.out PRIVATE -pg)
target_link_options(serial.out PRIVATE -pg)
endif()

# check if CUDA is available
include(CheckLanguage)
check_language(CUDA)

# GPU CUDA
if(CMAKE_CUDA_COMPILER)
# enable CUDA
enable_language(CUDA)
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CUDA_COMPILER})

# set the CUDA standard
if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()

set(CMAKE_CUDA_FLAGS "-Wall -Wextra -g -O2")

# compile the file with .cc extension using nvcc
set_source_files_properties(main.cc PROPERTIES LANGUAGE CUDA)
# compile gpu cuda module
add_executable(cuda.out main.cc)
# link boost
target_link_libraries(cuda.out PRIVATE ${Boost_LIBRARIES})
target_include_directories(cuda.out PRIVATE ${Boost_PATH})
# set the cuda architectures
set_target_properties(cuda.out PROPERTIES CUDA_ARCHITECTURES
"50;60;61;62;70")
# alpaka build flags
target_compile_options(
cuda.out
PRIVATE -DALPAKA_ACC_GPU_CUDA_PRESENT -DALPAKA_ACC_GPU_CUDA_ENABLED
-DALPAKA_ACC_GPU_CUDA_ASYNC_BACKEND)
# nvcc compilation flags
target_compile_options(
cuda.out PRIVATE --expt-relaxed-constexpr -gencode
arch=compute_61,code=[sm_61,compute_61])
if(NOT "${ANNOTATE}" STREQUAL "")
target_compile_options(cuda.out PRIVATE -DANNOTATE)
endif()
target_compile_options(cuda.out PRIVATE -pg)
target_link_options(cuda.out PRIVATE -pg)
endif()

# # GPU HIP check if HIP is available
# check_language(HIP)
# if(CMAKE_HIP_COMPILER)
# # enable HIP
# enable_language(HIP)
# set(CMAKE_HIP_HOST_COMPILER ${CMAKE_HIP_COMPILER})

# # look for the hip package folder
# find_package(hip)

# set(hip_BASE "${hip_INCLUDE_DIRS}/..")
# # set the hipcc compiler
# set(CMAKE_CXX_COMPILER "${hip_BASE}/bin/hipcc")
# # compile gpu hip module
# add_executable(hip.out main.cc)
# # link boost
# target_link_libraries(hip.out PRIVATE ${Boost_LIBRARIES})
# target_include_directories(hip.out PRIVATE ${Boost_PATH})
# # alpaka build flags
# target_compile_options(
# hip.out
# PRIVATE -DALPAKA_ACC_GPU_HIP_PRESENT -DALPAKA_ACC_GPU_HIP_ENABLED
# -DALPAKA_ACC_GPU_HIP_ASYNC_BACKEND)
# # link hip-rand libraries
# target_include_directories(hip.out PRIVATE ${hip_INCLUDE_DIRS})
# target_include_directories(hip.out PRIVATE ${hip_BASE}/hiprand/include)
# target_include_directories(hip.out PRIVATE ${hip_BASE}/rocrand/include)
# endif()

14 changes: 14 additions & 0 deletions profiling/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

# Profiling
## How to compile
The profiling code can be compiled automatically for all the supported backends with CMake
using the command:
```bash
cmake -B build && make -C build
```
In the code there are some print-outs which give the execution times of different parts of
the execution before the algorithm starts. This can be enabled by adding a flag to the
previous command:
```bash
cmake -B build -DANNOTATE=ON && make -C build
```
Loading

0 comments on commit ad43e2b

Please sign in to comment.