Feature profiling workflow (cms-patatrack#18)

* Create profiling folder * Fix `VecArray` constructor for `Point` * Error handling in `read_csv` * Rename file * Create workflow for profiling serial code * Fix typo * Add workflow that uploads the profiler logs as artifacts * Update gitignore * Fix wrong image usage for profiling * Fix typo * Formatting * Update CMake file for including tbb and cuda * Remove unneeded include * Add annotations in profiling `main.cc` * Add readme with descriptions on how to compile * Fix typo
sbaldu · Mar 19, 2024 · ad43e2b · ad43e2b
1 parent 1407ddd
commit ad43e2b
Show file tree

Hide file tree

Showing 11 changed files with 10,419 additions and 12 deletions.
diff --git a/.github/workflows/profile_serial.yml b/.github/workflows/profile_serial.yml
@@ -0,0 +1,44 @@
+name: Profile serial code and save log
+
+# The workflow gets triggered by pushes and pull requests
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+
+    steps:
+    # checks out the code in the repository
+    - uses: actions/checkout@v3
+      with:
+        submodules: true
+
+    # pull docker images for building and profiling
+    - name: Pull docker images from DockerHub
+      run: |
+        docker pull sbalducci00/sbaldu:cluestering_arch.serial
+        docker pull sbalducci00/sbaldu:profiler
+
+    - name: Build
+      working-directory: ${{github.workspace}}
+      run: |
+        docker run -v $(pwd):/app -w /app/profiling \
+        sbalducci00/sbaldu:cluestering_arch.serial bash -c '
+          cmake -B build && make -C build
+        '
+
+    - name: Profile
+      working-directory: ${{github.workspace}}
+      run: |
+        docker run -v $(pwd):/app -w /app/profiling \
+        sbalducci00/sbaldu:profiler bash -c '
+          ./build/serial.out
+          gprof -b ./build/serial.out
+        '
diff --git a/.github/workflows/upload_profile_log.yml b/.github/workflows/upload_profile_log.yml
@@ -0,0 +1,46 @@
+name: Upload the logs of the profiler as artifacts
+
+on:
+  push:
+    branches: [ "main" ]
+
+jobs:
+  build-and-deploy:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+      with:
+        submodules: true
+
+    # pull docker images for building and profiling
+    - name: Pull docker images from DockerHub
+      run: |
+        docker pull sbalducci00/sbaldu:cluestering_arch.serial
+        docker pull sbalducci00/sbaldu:profiler
+
+    - name: Build
+      working-directory: ${{github.workspace}}
+      run: |
+        docker run -v $(pwd):/app -w /app/profiling \
+        sbalducci00/sbaldu:cluestering_arch.serial bash -c '
+          cmake -B build && make -C build
+        '
+
+    - name: Profile
+      working-directory: ${{github.workspace}}/profiling
+      run: |
+        docker run -v $(pwd):/app -w /app/profiling \
+        sbalducci00/sbaldu:profiler bash -c '
+          ./build/serial.out
+          mkdir -p logs
+          gprof -b ./build/serial.out > logs/test.log
+        '
+
+    - name: Archive log files
+      uses: actions/upload-artifact@v4
+      with:
+        name: PR_${{ github.event.pull_request.number }}
+        path: |
+          profiling/logs/test.log
diff --git a/.gitignore b/.gitignore
@@ -9,6 +9,10 @@ build/
 # Shared library files
 *.so
 
+# Profiling files
+*out*
+vgcore.*
+
 # Executables
 *.out
 *run_tests.sh

diff --git a/CLUEstering/CLUEstering.py b/CLUEstering/CLUEstering.py
@@ -23,10 +23,10 @@
 tbb_found = exists(str(*glob(join(path, 'lib/CLUE_CPU_TBB*.so'))))
 if tbb_found:
     import CLUE_CPU_TBB as cpu_tbb
-cuda_found = exists(str(*glob(join(path, 'lib/CLUE_CPU_CUDA*.so'))))
+cuda_found = exists(str(*glob(join(path, 'lib/CLUE_GPU_CUDA*.so'))))
 if cuda_found:
     import CLUE_GPU_CUDA as gpu_cuda
-hip_found = exists(str(*glob(join(path, 'lib/CLUE_CPU_HIP*.so'))))
+hip_found = exists(str(*glob(join(path, 'lib/CLUE_GPU_HIP*.so'))))
 if hip_found:
     import CLUE_GPU_HIP as gpu_hip
 

diff --git a/CLUEstering/alpaka/AlpakaCore/chooseDevice.h b/CLUEstering/alpaka/AlpakaCore/chooseDevice.h
@@ -3,7 +3,6 @@
 
 #include "alpakaConfig.h"
 #include "alpakaDevices.h"
-#include "Framework/Event.h"
 
 namespace cms::alpakatools {
 

diff --git a/CLUEstering/alpaka/DataFormats/Points.h b/CLUEstering/alpaka/DataFormats/Points.h
@@ -1,25 +1,32 @@
 #ifndef points_h
 #define points_h
 
-#include <array>
+#include "alpaka/AlpakaVecArray.h"
+#include "alpaka/PointsAlpaka.h"
 #include <algorithm>
+#include <array>
 #include <cmath>
 #include <functional>
 #include <iostream>
 #include <vector>
-#include "alpaka/PointsAlpaka.h"
-#include "alpaka/AlpakaVecArray.h"
 
 using cms::alpakatools::VecArray;
 
-template <uint8_t Ndim>
-struct Points {
+template <uint8_t Ndim> struct Points {
   Points() = default;
-  Points(const std::vector<VecArray<float, Ndim>>& coords, const std::vector<float>& weight)
-      : m_coords{coords}, m_weight{weight}, n{weight.size()} {}
-  Points(const std::vector<std::vector<float>>& coords, const std::vector<float>& weight)
+  Points(const std::vector<VecArray<float, Ndim>> &coords,
+         const std::vector<float> &weight)
+      : m_coords{coords}, m_weight{weight}, n{weight.size()} {
+    m_rho.resize(n);
+    m_delta.resize(n);
+    m_nearestHigher.resize(n);
+    m_clusterIndex.resize(n);
+    m_isSeed.resize(n);
+  }
+  Points(const std::vector<std::vector<float>> &coords,
+         const std::vector<float> &weight)
       : m_weight{weight}, n{weight.size()} {
-    for (const auto& x : coords) {
+    for (const auto &x : coords) {
       VecArray<float, Ndim> temp_vecarray;
       for (auto value : x) {
         temp_vecarray.push_back_unsafe(value);

diff --git a/profiling/CMakeLists.txt b/profiling/CMakeLists.txt
@@ -0,0 +1,146 @@
+cmake_minimum_required(VERSION 3.16.0)
+project(Profiling LANGUAGES CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+set(CMAKE_CXX_FLAGS "-Wall -Wextra -g -O2")
+
+# include alpaka extern subfolder
+include_directories(../extern/alpaka/include)
+
+# look for boost
+find_package(Boost 1.75.0)
+
+# if boost is not found, it's fetched from the official boost repository
+if(NOT Boost_FOUND)
+  include(FetchContent)
+  FetchContent_Declare(
+    boost
+    URL https://boostorg.jfrog.io/artifactory/main/release/1.76.0/source/boost_1_76_0.tar.gz
+  )
+
+  FetchContent_GetProperties(boost)
+  if(NOT boost_POPULATED)
+    FetchContent_Populate(boost)
+  endif()
+  set(Boost_PATH ./build/_deps/boost-src)
+else()
+  set(Boost_PATH ${Boost_INCLUDE_DIRS})
+endif()
+
+# include source directories
+include_directories(../CLUEstering/alpaka/)
+
+# CPU Serial 
+add_executable(serial.out main.cc)
+# link boost
+target_link_libraries(serial.out PRIVATE ${Boost_LIBRARIES})
+target_include_directories(serial.out PRIVATE ${Boost_PATH})
+# alpaka build flags
+target_compile_options(
+  serial.out 
+  PRIVATE -DALPAKA_HOST_ONLY -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_PRESENT
+          -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
+          -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_SYNC_BACKEND)
+if(NOT "${ANNOTATE}" STREQUAL "")
+	target_compile_options(serial.out PRIVATE -DANNOTATE)
+endif()
+target_compile_options(serial.out PRIVATE -pg)
+target_link_options(serial.out PRIVATE -pg)
+
+find_package(TBB)
+
+# CPU TBB
+if(TBB_FOUND)
+  # compile cpu tbb module
+  add_executable(tbb.out main.cc)
+  target_link_libraries(tbb.out PRIVATE ${Boost_LIBRARIES})
+  target_include_directories(tbb.out PRIVATE ${Boost_PATH})
+  target_compile_options(
+    tbb.out
+    PRIVATE -ltbb -DALPAKA_ACC_CPU_B_TBB_T_SEQ_PRESENT
+            -DALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
+            -DALPAKA_ACC_CPU_B_TBB_T_SEQ_ASYNC_BACKEND)
+  target_link_libraries(tbb.out PRIVATE TBB::tbb)
+  if(NOT "${ANNOTATE}" STREQUAL "")
+	  target_compile_options(tbb.out PRIVATE -DANNOTATE)
+  endif()
+  target_compile_options(serial.out PRIVATE -pg)
+  target_link_options(serial.out PRIVATE -pg)
+endif()
+
+# check if CUDA is available
+include(CheckLanguage)
+check_language(CUDA)
+
+# GPU CUDA
+if(CMAKE_CUDA_COMPILER)
+  # enable CUDA
+  enable_language(CUDA)
+  set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CUDA_COMPILER})
+
+  # set the CUDA standard
+  if(NOT DEFINED CMAKE_CUDA_STANDARD)
+    set(CMAKE_CUDA_STANDARD 17)
+    set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+  endif()
+
+  set(CMAKE_CUDA_FLAGS "-Wall -Wextra -g -O2")
+
+  # compile the file with .cc extension using nvcc
+  set_source_files_properties(main.cc PROPERTIES LANGUAGE CUDA)
+  # compile gpu cuda module
+  add_executable(cuda.out main.cc)
+  # link boost
+  target_link_libraries(cuda.out PRIVATE ${Boost_LIBRARIES})
+  target_include_directories(cuda.out PRIVATE ${Boost_PATH})
+  # set the cuda architectures
+  set_target_properties(cuda.out PROPERTIES CUDA_ARCHITECTURES
+                                                 "50;60;61;62;70")
+  # alpaka build flags
+  target_compile_options(
+    cuda.out
+    PRIVATE -DALPAKA_ACC_GPU_CUDA_PRESENT -DALPAKA_ACC_GPU_CUDA_ENABLED
+            -DALPAKA_ACC_GPU_CUDA_ASYNC_BACKEND)
+  # nvcc compilation flags
+  target_compile_options(
+    cuda.out PRIVATE --expt-relaxed-constexpr -gencode
+                          arch=compute_61,code=[sm_61,compute_61])
+  if(NOT "${ANNOTATE}" STREQUAL "")
+	  target_compile_options(cuda.out PRIVATE -DANNOTATE)
+  endif()
+  target_compile_options(cuda.out PRIVATE -pg)
+  target_link_options(cuda.out PRIVATE -pg)
+endif()
+
+# # GPU HIP check if HIP is available
+# check_language(HIP)
+# if(CMAKE_HIP_COMPILER)
+#   # enable HIP
+#   enable_language(HIP)
+#   set(CMAKE_HIP_HOST_COMPILER ${CMAKE_HIP_COMPILER})
+
+#   # look for the hip package folder
+#   find_package(hip)
+
+#   set(hip_BASE "${hip_INCLUDE_DIRS}/..")
+#   # set the hipcc compiler
+#   set(CMAKE_CXX_COMPILER "${hip_BASE}/bin/hipcc")
+#   # compile gpu hip module
+#   add_executable(hip.out main.cc)
+#   # link boost
+#   target_link_libraries(hip.out PRIVATE ${Boost_LIBRARIES})
+#   target_include_directories(hip.out PRIVATE ${Boost_PATH})
+#   # alpaka build flags
+#   target_compile_options(
+#     hip.out
+#     PRIVATE -DALPAKA_ACC_GPU_HIP_PRESENT -DALPAKA_ACC_GPU_HIP_ENABLED
+#             -DALPAKA_ACC_GPU_HIP_ASYNC_BACKEND)
+#   # link hip-rand libraries
+#   target_include_directories(hip.out PRIVATE ${hip_INCLUDE_DIRS})
+#   target_include_directories(hip.out PRIVATE ${hip_BASE}/hiprand/include)
+#   target_include_directories(hip.out PRIVATE ${hip_BASE}/rocrand/include)
+# endif()
+
diff --git a/profiling/README.md b/profiling/README.md
@@ -0,0 +1,14 @@
+
+# Profiling
+## How to compile
+The profiling code can be compiled automatically for all the supported backends with CMake
+using the command:
+```bash
+cmake -B build && make -C build
+```
+In the code there are some print-outs which give the execution times of different parts of
+the execution before the algorithm starts. This can be enabled by adding a flag to the
+previous command:
+```bash
+cmake -B build -DANNOTATE=ON && make -C build
+```