From bd453730ba0b914a5b4b653874e5da804642c263 Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Thu, 9 Jan 2025 17:15:15 +0100
Subject: [PATCH 01/19] update

---
 Examples/Python/src/ExaTrkXTrackFinding.cpp   |  28 +++
 Plugins/ExaTrkX/CMakeLists.txt                |  31 ++-
 .../ExaTrkX/TensorRTEdgeClassifier.hpp        |  61 ++++++
 .../ExaTrkX/src/TensorRTEdgeClassifier.cpp    | 190 ++++++++++++++++++
 4 files changed, 309 insertions(+), 1 deletion(-)
 create mode 100644 Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
 create mode 100644 Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
diff --git a/Examples/Python/src/ExaTrkXTrackFinding.cpp b/Examples/Python/src/ExaTrkXTrackFinding.cpp
index c7a30b51d2e..7fe63b76714 100644
--- a/Examples/Python/src/ExaTrkXTrackFinding.cpp
+++ b/Examples/Python/src/ExaTrkXTrackFinding.cpp
@@ -11,6 +11,7 @@
 #include "Acts/Plugins/ExaTrkX/ExaTrkXPipeline.hpp"
 #include "Acts/Plugins/ExaTrkX/OnnxEdgeClassifier.hpp"
 #include "Acts/Plugins/ExaTrkX/OnnxMetricLearning.hpp"
+#include "Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp"
 #include "Acts/Plugins/ExaTrkX/TorchEdgeClassifier.hpp"
 #include "Acts/Plugins/ExaTrkX/TorchMetricLearning.hpp"
 #include "Acts/Plugins/ExaTrkX/TorchTruthGraphMetricsHook.hpp"
@@ -100,6 +101,33 @@ void addExaTrkXTrackFinding(Context &ctx) {
     ACTS_PYTHON_MEMBER(useEdgeFeatures);
     ACTS_PYTHON_STRUCT_END();
   }
+#endif
+
+#ifdef ACTS_EXATRKX_WITH_TENSORRT
+  {
+    using Alg = Acts::TensorRTEdgeClassifier;
+    using Config = Alg::Config;
+
+    auto alg =
+        py::class_<Alg, Acts::EdgeClassificationBase, std::shared_ptr<Alg>>(
+            mex, "TensorRTEdgeClassifier")
+            .def(py::init([](const Config &c, Logging::Level lvl) {
+                   return std::make_shared<Alg>(
+                       c, getDefaultLogger("EdgeClassifier", lvl));
+                 }),
+                 py::arg("config"), py::arg("level"))
+            .def_property_readonly("config", &Alg::config);
+
+    auto c = py::class_<Config>(alg, "Config").def(py::init<>());
+    ACTS_PYTHON_STRUCT_BEGIN(c, Config);
+    ACTS_PYTHON_MEMBER(modelPath);
+    ACTS_PYTHON_MEMBER(selectedFeatures);
+    ACTS_PYTHON_MEMBER(cut);
+    ACTS_PYTHON_MEMBER(deviceID);
+    ACTS_PYTHON_MEMBER(doSigmoid);
+    ACTS_PYTHON_STRUCT_END();
+  }
+#endif
   {
     using Alg = Acts::BoostTrackBuilding;
 
diff --git a/Plugins/ExaTrkX/CMakeLists.txt b/Plugins/ExaTrkX/CMakeLists.txt
index 802941e7367..417a6df73b5 100644
--- a/Plugins/ExaTrkX/CMakeLists.txt
+++ b/Plugins/ExaTrkX/CMakeLists.txt
@@ -22,7 +22,36 @@ if(ACTS_EXATRKX_ENABLE_TORCH)
     )
 endif()
 
-add_library(ActsPluginExaTrkX SHARED ${SOURCES})
+if(ACTS_EXATRKX_ENABLE_TORCH_AOT)
+    target_sources(ActsPluginExaTrkX PRIVATE src/TorchEdgeClassifierAOT.cpp)
+    target_compile_definitions(
+        ActsPluginExaTrkX
+        PUBLIC ACTS_EXATRKX_WITH_TORCH_AOT
+    )
+
+    add_library(TorchGnnAot SHARED IMPORTED)
+    set_target_properties(
+        TorchGnnAot
+        PROPERTIES
+            IMPORTED_LOCATION
+                "/root/reproduce_gnn_results/rel24/data/gnn.test.so"
+    )
+    target_link_libraries(ActsPluginExaTrkX PUBLIC TorchGnnAot)
+endif()
+
+if(ACTS_EXATRKX_ENABLE_TENSORRT)
+    find_package(TensorRT REQUIRED)
+    message(STATUS "Found TensorRT ${TensorRT_VERSION}")
+    target_link_libraries(
+        ActsPluginExaTrkX
+        PUBLIC trt::nvinfer trt::nvinfer_plugin
+    )
+    target_sources(ActsPluginExaTrkX PRIVATE src/TensorRTEdgeClassifier.cpp)
+    target_compile_definitions(
+        ActsPluginExaTrkX
+        PUBLIC ACTS_EXATRKX_WITH_TENSORRT
+    )
+endif()
 
 target_include_directories(
     ActsPluginExaTrkX
diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
new file mode 100644
index 00000000000..1eb24f5ade8
--- /dev/null
+++ b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
@@ -0,0 +1,61 @@
+// This file is part of the ACTS project.
+//
+// Copyright (C) 2016 CERN for the benefit of the ACTS project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#pragma once
+
+#include "Acts/Plugins/ExaTrkX/Stages.hpp"
+#include "Acts/Utilities/Logger.hpp"
+
+#include <memory>
+
+#include <torch/torch.h>
+
+namespace nvinfer1 {
+class IRuntime;
+class ICudaEngine;
+class ILogger;
+class IExecutionContext;
+}  // namespace nvinfer1
+
+namespace Acts {
+
+class TensorRTEdgeClassifier final : public Acts::EdgeClassificationBase {
+ public:
+  struct Config {
+    std::string modelPath;
+    std::vector<int> selectedFeatures = {};
+    float cut = 0.21;
+    int deviceID = 0;
+    bool useEdgeFeatures = false;
+    bool doSigmoid = true;
+  };
+
+  TensorRTEdgeClassifier(const Config &cfg,
+                         std::unique_ptr<const Logger> logger);
+  ~TensorRTEdgeClassifier();
+
+  std::tuple<std::any, std::any, std::any, std::any> operator()(
+      std::any nodeFeatures, std::any edgeIndex, std::any edgeFeatures = {},
+      const ExecutionContext &execContext = {}) override;
+
+  Config config() const { return m_cfg; }
+  torch::Device device() const override { return torch::kCUDA; };
+
+ private:
+  std::unique_ptr<const Acts::Logger> m_logger;
+  const auto &logger() const { return *m_logger; }
+
+  Config m_cfg;
+
+  std::unique_ptr<nvinfer1::IRuntime> m_runtime;
+  std::unique_ptr<nvinfer1::ICudaEngine> m_engine;
+  std::unique_ptr<nvinfer1::ILogger> m_trtLogger;
+  std::unique_ptr<nvinfer1::IExecutionContext> m_context;
+};
+
+}  // namespace Acts
diff --git a/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp b/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
new file mode 100644
index 00000000000..ef5d058d54c
--- /dev/null
+++ b/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
@@ -0,0 +1,190 @@
+// This file is part of the ACTS project.
+//
+// Copyright (C) 2016 CERN for the benefit of the ACTS project
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#include "Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp"
+
+#include "Acts/Plugins/ExaTrkX/detail/Utils.hpp"
+
+#include <chrono>
+#include <filesystem>
+#include <fstream>
+
+#include <NvInfer.h>
+#include <NvInferPlugin.h>
+#include <NvInferRuntimeBase.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <cuda_runtime.h>
+
+#include "printCudaMemInfo.hpp"
+
+using namespace torch::indexing;
+
+namespace {
+
+class TensorRTLogger : public nvinfer1::ILogger {
+  std::unique_ptr<const Acts::Logger> m_logger;
+
+ public:
+  TensorRTLogger(Acts::Logging::Level lvl)
+      : m_logger(Acts::getDefaultLogger("TensorRT", lvl)) {}
+
+  void log(Severity severity, const char *msg) noexcept override {
+    const auto &logger = *m_logger;
+    switch (severity) {
+      case Severity::kVERBOSE:
+        ACTS_DEBUG(msg);
+        break;
+      case Severity::kINFO:
+        ACTS_INFO(msg);
+        break;
+      case Severity::kWARNING:
+        ACTS_WARNING(msg);
+        break;
+      case Severity::kERROR:
+        ACTS_ERROR(msg);
+        break;
+      case Severity::kINTERNAL_ERROR:
+        ACTS_FATAL(msg);
+        break;
+    }
+  }
+};
+
+}  // namespace
+
+namespace Acts {
+
+TensorRTEdgeClassifier::TensorRTEdgeClassifier(
+    const Config &cfg, std::unique_ptr<const Logger> _logger)
+    : m_logger(std::move(_logger)),
+      m_cfg(cfg),
+      m_trtLogger(std::make_unique<TensorRTLogger>(m_logger->level())) {
+  auto status = initLibNvInferPlugins(m_trtLogger.get(), "");
+  assert(status);
+
+  std::size_t fsize =
+      std::filesystem::file_size(std::filesystem::path(m_cfg.modelPath));
+  std::vector<char> engineData(fsize);
+
+  ACTS_DEBUG("Load '" << m_cfg.modelPath << "' with size " << fsize);
+
+  std::ifstream engineFile(m_cfg.modelPath);
+  engineFile.read(engineData.data(), fsize);
+
+  m_runtime.reset(nvinfer1::createInferRuntime(*m_trtLogger));
+
+  m_engine.reset(m_runtime->deserializeCudaEngine(engineData.data(), fsize));
+
+  m_context.reset(m_engine->createExecutionContext());
+}
+
+TensorRTEdgeClassifier::~TensorRTEdgeClassifier() {}
+
+auto milliseconds = [](const auto &a, const auto &b) {
+  return std::chrono::duration<double, std::milli>(b - a).count();
+};
+
+struct TimePrinter {
+  const char *name;
+  decltype(std::chrono::high_resolution_clock::now()) t0, t1;
+  TimePrinter(const char *n) : name(n) {
+    t0 = std::chrono::high_resolution_clock::now();
+  }
+  ~TimePrinter() {
+    std::cout << name << ": " << milliseconds(t0, t1) << std::endl;
+  }
+};
+
+#if 0
+#define TIME_BEGIN(name) TimePrinter printer##name(#name);
+#define TIME_END(name) \
+  printer##name.t1 = std::chrono::high_resolution_clock::now();
+#else
+#define TIME_BEGIN(name) /*nothing*/
+#define TIME_END(name)   /*ǹothing*/
+#endif
+
+std::tuple<std::any, std::any, std::any, std::any>
+TensorRTEdgeClassifier::operator()(std::any inNodeFeatures,
+                                   std::any inEdgeIndex,
+                                   std::any inEdgeFeatures,
+                                   const ExecutionContext &execContext) {
+  decltype(std::chrono::high_resolution_clock::now()) t0, t1, t2, t3, t4, t5;
+  t0 = std::chrono::high_resolution_clock::now();
+
+  c10::cuda::CUDAStreamGuard(execContext.stream.value());
+
+  auto nodeFeatures =
+      std::any_cast<torch::Tensor>(inNodeFeatures).to(torch::kCUDA);
+
+  auto edgeIndex = std::any_cast<torch::Tensor>(inEdgeIndex).to(torch::kCUDA);
+  ACTS_DEBUG("edgeIndex: " << detail::TensorDetails{edgeIndex});
+
+  auto edgeFeatures =
+      std::any_cast<torch::Tensor>(inEdgeFeatures).to(torch::kCUDA);
+  ACTS_DEBUG("edgeFeatures: " << detail::TensorDetails{edgeFeatures});
+
+  t1 = std::chrono::high_resolution_clock::now();
+
+  m_context->setInputShape(
+      "x", nvinfer1::Dims2{nodeFeatures.size(0), nodeFeatures.size(1)});
+  m_context->setTensorAddress("x", nodeFeatures.data_ptr());
+
+  m_context->setInputShape(
+      "edge_index", nvinfer1::Dims2{edgeIndex.size(0), edgeIndex.size(1)});
+  m_context->setTensorAddress("edge_index", edgeIndex.data_ptr());
+
+  m_context->setInputShape(
+      "edge_attr", nvinfer1::Dims2{edgeFeatures.size(0), edgeFeatures.size(1)});
+  m_context->setTensorAddress("edge_attr", edgeFeatures.data_ptr());
+
+  void *outputMem{nullptr};
+  std::size_t outputSize = edgeIndex.size(1) * sizeof(float);
+  cudaMalloc(&outputMem, outputSize);
+  m_context->setTensorAddress("output", outputMem);
+
+  t2 = std::chrono::high_resolution_clock::now();
+
+  {
+    auto stream = execContext.stream.value().stream();
+    auto status = m_context->enqueueV3(stream);
+    cudaStreamSynchronize(stream);
+    ACTS_VERBOSE("TensorRT output status: " << std::boolalpha << status);
+  }
+
+  t3 = std::chrono::high_resolution_clock::now();
+
+  auto scores = torch::from_blob(
+      outputMem, edgeIndex.size(1), 1, [](void *ptr) { cudaFree(ptr); },
+      torch::TensorOptions().device(torch::kCUDA).dtype(torch::kFloat32));
+
+  scores.sigmoid_();
+
+  ACTS_VERBOSE("Size after classifier: " << scores.size(0));
+  ACTS_VERBOSE("Slice of classified output:\n"
+               << scores.slice(/*dim=*/0, /*start=*/0, /*end=*/9));
+  printCudaMemInfo(logger());
+
+  torch::Tensor mask = scores > m_cfg.cut;
+  torch::Tensor edgesAfterCut = edgeIndex.index({Slice(), mask});
+
+  scores = scores.masked_select(mask);
+  ACTS_VERBOSE("Size after score cut: " << edgesAfterCut.size(1));
+  printCudaMemInfo(logger());
+
+  t4 = std::chrono::high_resolution_clock::now();
+
+  ACTS_DEBUG("Time anycast:  " << milliseconds(t0, t1));
+  ACTS_DEBUG("Time alloc, set shape " << milliseconds(t1, t2));
+  ACTS_DEBUG("Time inference:       " << milliseconds(t2, t3));
+  ACTS_DEBUG("Time sigmoid and cut: " << milliseconds(t3, t4));
+
+  return {nodeFeatures, edgesAfterCut, edgeFeatures, scores};
+}
+
+}  // namespace Acts

From 6d2b0a6cbfbe18abf26fac9ba5597cb2302dcffa Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Thu, 9 Jan 2025 17:31:26 +0100
Subject: [PATCH 02/19] update

---
 .gitlab-ci.yml | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 25aff340111..a82fc9b06f7 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -186,6 +186,40 @@ test_exatrkx_python:
     - pytest -rFsv -k torch --collect-only
     - pytest -rFsv -k gpu-torch # For now only test torch GPU pipeline
 
+build_gnn_tensorrt:
+  stage: build
+  image: nvcr.io/nvidia/tensorrt:24.12-py3
+  variables:
+    DEPENDENCY_URL: https://acts.web.cern.ch/ACTS/ci/ubuntu-24.04/deps.$DEPENDENCY_TAG.tar.zst
+
+  cache:
+    key: ccache-${CI_JOB_NAME}-${CI_COMMIT_REF_SLUG}-${CCACHE_KEY_SUFFIX}
+    fallback_keys:
+      - ccache-${CI_JOB_NAME}-${CI_DEFAULT_BRANCH}-${CCACHE_KEY_SUFFIX}
+    when: always
+    paths:
+      - ${CCACHE_DIR}
+
+  tags:
+    - docker-gpu-nvidia
+
+  script:
+    - apt-get update -y
+    - git clone $CLONE_URL src
+    - cd src
+    - git checkout $HEAD_SHA
+    - source CI/dependencies.sh
+    - cd ..
+    - mkdir build
+    - >
+      cmake -B build -S src
+      -DACTS_BUILD_PLUGIN_EXATRKX=ON
+      -DACTS_EXATRKX_ENABLE_TENSORRT=ON
+      -DPython_EXECUTABLE=$(which python3)
+      -DCMAKE_CUDA_ARCHITECTURES="75;86"
+
+
+
 build_linux_ubuntu:
   stage: build
   image: ghcr.io/acts-project/ubuntu2404:63

From fadbfd3fa443cd633c54638da753f192b3488871 Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Wed, 15 Jan 2025 15:12:52 +0100
Subject: [PATCH 03/19] update gitlab

---
 .gitlab-ci.yml | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index e7d91151ba1..3125e7f6f47 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -205,6 +205,39 @@ build_gnn_tensorrt:
 
   script:
     - apt-get update -y
+    - apt-get upgrade -y
+    - >
+      apt-get install -y
+        build-essential
+        curl
+        git
+        freeglut3-dev
+        libexpat-dev
+        libftgl-dev
+        libgl2ps-dev
+        libglew-dev
+        libgsl-dev
+        liblz4-dev
+        liblzma-dev
+        libpcre3-dev
+        libx11-dev
+        libxext-dev
+        libxft-dev
+        libxpm-dev
+        libxerces-c-dev
+        libzstd-dev
+        zstd
+        ninja-build
+        python3
+        python3-dev
+        python3-pip
+        rsync
+        unzip
+        zlib1g-dev
+        ccache
+        libsqlite3-dev
+        time
+    - apt-get clean -y
     - git clone $CLONE_URL src
     - cd src
     - git checkout $HEAD_SHA

From 963d14fdf27c7c5f9647054366959a9de1f43f64 Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Wed, 15 Jan 2025 15:24:43 +0100
Subject: [PATCH 04/19] update gitlab

---
 .gitlab-ci.yml | 58 +++++++++++++++++++++++++-------------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 3125e7f6f47..9feddf0f199 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -208,35 +208,35 @@ build_gnn_tensorrt:
     - apt-get upgrade -y
     - >
       apt-get install -y
-        build-essential
-        curl
-        git
-        freeglut3-dev
-        libexpat-dev
-        libftgl-dev
-        libgl2ps-dev
-        libglew-dev
-        libgsl-dev
-        liblz4-dev
-        liblzma-dev
-        libpcre3-dev
-        libx11-dev
-        libxext-dev
-        libxft-dev
-        libxpm-dev
-        libxerces-c-dev
-        libzstd-dev
-        zstd
-        ninja-build
-        python3
-        python3-dev
-        python3-pip
-        rsync
-        unzip
-        zlib1g-dev
-        ccache
-        libsqlite3-dev
-        time
+      build-essential
+      curl
+      git
+      freeglut3-dev
+      libexpat-dev
+      libftgl-dev
+      libgl2ps-dev
+      libglew-dev
+      libgsl-dev
+      liblz4-dev
+      liblzma-dev
+      libpcre3-dev
+      libx11-dev
+      libxext-dev
+      libxft-dev
+      libxpm-dev
+      libxerces-c-dev
+      libzstd-dev
+      zstd
+      ninja-build
+      python3
+      python3-dev
+      python3-pip
+      rsync
+      unzip
+      zlib1g-dev
+      ccache
+      libsqlite3-dev
+      time
     - apt-get clean -y
     - git clone $CLONE_URL src
     - cd src

From 09ce4b2cea46c2734407dbbca90cb5386a08f3d3 Mon Sep 17 00:00:00 2001
From: Benjamin Huth <37871400+benjaminhuth@users.noreply.github.com>
Date: Thu, 16 Jan 2025 10:12:12 +0100
Subject: [PATCH 05/19] Update .gitlab-ci.yml

---
 .gitlab-ci.yml | 40 ++++------------------------------------
 1 file changed, 4 insertions(+), 36 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 9feddf0f199..acfef69013e 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -188,7 +188,7 @@ test_exatrkx_python:
 
 build_gnn_tensorrt:
   stage: build
-  image: nvcr.io/nvidia/tensorrt:24.12-py3
+  image: ghcr.io/acts-project/ubuntu2404_tensorrt:sha-b4f481f@sha256:8887aa00ad4394a53b4ca54968121d8893d537e5daf50805f1dd2030caef78ce
   variables:
     DEPENDENCY_URL: https://acts.web.cern.ch/ACTS/ci/ubuntu-24.04/deps.$DEPENDENCY_TAG.tar.zst
 
@@ -204,40 +204,6 @@ build_gnn_tensorrt:
     - docker-gpu-nvidia
 
   script:
-    - apt-get update -y
-    - apt-get upgrade -y
-    - >
-      apt-get install -y
-      build-essential
-      curl
-      git
-      freeglut3-dev
-      libexpat-dev
-      libftgl-dev
-      libgl2ps-dev
-      libglew-dev
-      libgsl-dev
-      liblz4-dev
-      liblzma-dev
-      libpcre3-dev
-      libx11-dev
-      libxext-dev
-      libxft-dev
-      libxpm-dev
-      libxerces-c-dev
-      libzstd-dev
-      zstd
-      ninja-build
-      python3
-      python3-dev
-      python3-pip
-      rsync
-      unzip
-      zlib1g-dev
-      ccache
-      libsqlite3-dev
-      time
-    - apt-get clean -y
     - git clone $CLONE_URL src
     - cd src
     - git checkout $HEAD_SHA
@@ -250,7 +216,9 @@ build_gnn_tensorrt:
       -DACTS_EXATRKX_ENABLE_TENSORRT=ON
       -DPython_EXECUTABLE=$(which python3)
       -DCMAKE_CUDA_ARCHITECTURES="75;86"
-
+    - ccache -z
+    - cmake --build build -- -j6
+    - ccache -s
 
 
 build_linux_ubuntu:

From 45ffd7b1f606f75062b3b690f2e58c1db702a93d Mon Sep 17 00:00:00 2001
From: Benjamin Huth <37871400+benjaminhuth@users.noreply.github.com>
Date: Thu, 16 Jan 2025 10:26:00 +0100
Subject: [PATCH 06/19] Update .gitlab-ci.yml

---
 .gitlab-ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index acfef69013e..e4ce64877cc 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -213,6 +213,7 @@ build_gnn_tensorrt:
     - >
       cmake -B build -S src
       -DACTS_BUILD_PLUGIN_EXATRKX=ON
+      -DACTS_EXATRKX_ENABLE_TORCH=OFF
       -DACTS_EXATRKX_ENABLE_TENSORRT=ON
       -DPython_EXECUTABLE=$(which python3)
       -DCMAKE_CUDA_ARCHITECTURES="75;86"

From b983ba9df356ea94b8f3956e40373c84bdaebded Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Thu, 16 Jan 2025 11:15:00 +0100
Subject: [PATCH 07/19] udpate

---
 .gitlab-ci.yml | 1 +
 CMakeLists.txt | 7 -------
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index e4ce64877cc..83f897b7181 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -214,6 +214,7 @@ build_gnn_tensorrt:
       cmake -B build -S src
       -DACTS_BUILD_PLUGIN_EXATRKX=ON
       -DACTS_EXATRKX_ENABLE_TORCH=OFF
+      -DACTS_EXATRKX_ENABLE_CUDA=ON
       -DACTS_EXATRKX_ENABLE_TENSORRT=ON
       -DPython_EXECUTABLE=$(which python3)
       -DCMAKE_CUDA_ARCHITECTURES="75;86"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a4fd002e871..72ce639c766 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -425,13 +425,6 @@ if(ACTS_BUILD_PLUGIN_EXATRKX)
     else()
         message(STATUS "Build Exa.TrkX plugin for CPU only")
     endif()
-    if(NOT (ACTS_EXATRKX_ENABLE_ONNX OR ACTS_EXATRKX_ENABLE_TORCH))
-        message(
-            FATAL_ERROR
-            "When building the Exa.TrkX plugin, at least one of ACTS_EXATRKX_ENABLE_ONNX \
-      and ACTS_EXATRKX_ENABLE_TORCHSCRIPT must be enabled."
-        )
-    endif()
     if(ACTS_EXATRKX_ENABLE_TORCH)
         find_package(TorchScatter REQUIRED)
     endif()

From 574aa9ae9a3b8b308c5ea16b6d12bf386f24f475 Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Thu, 16 Jan 2025 13:01:06 +0100
Subject: [PATCH 08/19] fix

---
 Examples/Python/src/ExaTrkXTrackFinding.cpp | 23 ++++++++++-----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/Examples/Python/src/ExaTrkXTrackFinding.cpp b/Examples/Python/src/ExaTrkXTrackFinding.cpp
index 3c950365278..66eee018f8b 100644
--- a/Examples/Python/src/ExaTrkXTrackFinding.cpp
+++ b/Examples/Python/src/ExaTrkXTrackFinding.cpp
@@ -100,6 +100,17 @@ void addExaTrkXTrackFinding(Context &ctx) {
     ACTS_PYTHON_MEMBER(useEdgeFeatures);
     ACTS_PYTHON_STRUCT_END();
   }
+  {
+    using Alg = Acts::BoostTrackBuilding;
+
+    auto alg = py::class_<Alg, Acts::TrackBuildingBase, std::shared_ptr<Alg>>(
+                   mex, "BoostTrackBuilding")
+                   .def(py::init([](Logging::Level lvl) {
+                          return std::make_shared<Alg>(
+                              getDefaultLogger("EdgeClassifier", lvl));
+                        }),
+                        py::arg("level"));
+  }
 #endif
 
 #ifdef ACTS_EXATRKX_WITH_TENSORRT
@@ -127,18 +138,6 @@ void addExaTrkXTrackFinding(Context &ctx) {
     ACTS_PYTHON_STRUCT_END();
   }
 #endif
-  {
-    using Alg = Acts::BoostTrackBuilding;
-
-    auto alg = py::class_<Alg, Acts::TrackBuildingBase, std::shared_ptr<Alg>>(
-                   mex, "BoostTrackBuilding")
-                   .def(py::init([](Logging::Level lvl) {
-                          return std::make_shared<Alg>(
-                              getDefaultLogger("EdgeClassifier", lvl));
-                        }),
-                        py::arg("level"));
-  }
-#endif
 
 #ifdef ACTS_EXATRKX_ONNX_BACKEND
   {

From d70d26fc0232383613c528759bae62a07aee6017 Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Thu, 16 Jan 2025 13:04:40 +0100
Subject: [PATCH 09/19] add cmake file

---
 cmake/FindTensorRT.cmake | 182 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 182 insertions(+)
 create mode 100644 cmake/FindTensorRT.cmake

diff --git a/cmake/FindTensorRT.cmake b/cmake/FindTensorRT.cmake
new file mode 100644
index 00000000000..42994b7a3f2
--- /dev/null
+++ b/cmake/FindTensorRT.cmake
@@ -0,0 +1,182 @@
+# ~~~
+# Copyright 2021 Olivier Le Doeuff
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+# This module defines the following variables:
+#
+# - TensorRT_FOUND: A boolean specifying whether or not TensorRT was found.
+# - TensorRT_VERSION: The exact version of TensorRT found
+# - TensorRT_VERSION_MAJOR: The major version of TensorRT.
+# - TensorRT_VERSION_MINOR: The minor version of TensorRT.
+# - TensorRT_VERSION_PATCH: The patch version of TensorRT.
+# - TensorRT_VERSION_TWEAK: The tweak version of TensorRT.
+# - TensorRT_INCLUDE_DIRS: The path to TensorRT ``include`` folder containing the header files    required to compile a project linking against TensorRT.
+# - TensorRT_LIBRARY_DIRS: The path to TensorRT library directory that contains libraries.
+#
+# This module create following targets:
+# - trt::nvinfer
+# - trt::nvinfer_plugin
+# - trt::nvonnxparser
+# - trt::nvparsers
+# This script was inspired from https://github.com/NicolasIRAGNE/CMakeScripts
+# This script was inspired from https://github.com/NVIDIA/tensorrt-laboratory/blob/master/cmake/FindTensorRT.cmake
+#
+# Hints
+# ^^^^^
+# A user may set ``TensorRT_ROOT`` to an installation root to tell this module where to look.
+# ~~~
+
+if(NOT TensorRT_FIND_COMPONENTS)
+    set(TensorRT_FIND_COMPONENTS nvinfer nvinfer_plugin nvonnxparser nvparsers)
+endif()
+set(TensorRT_LIBRARIES)
+
+# find the include directory of TensorRT
+find_path(
+    TensorRT_INCLUDE_DIR
+    NAMES NvInfer.h
+    PATHS ${TensorRT_ROOT}
+    ENV TensorRT_ROOT
+    PATH_SUFFIXES include
+)
+
+string(FIND ${TensorRT_INCLUDE_DIR} "NOTFOUND" _include_dir_notfound)
+if(NOT _include_dir_notfound EQUAL -1)
+    if(TensorRT_FIND_REQUIRED)
+        message(
+            FATAL_ERROR
+            "Fail to find TensorRT, please set TensorRT_ROOT. Include path not found."
+        )
+    endif()
+    return()
+endif()
+set(TensorRT_INCLUDE_DIRS ${TensorRT_INCLUDE_DIR})
+
+# Extract version of tensorrt
+if(EXISTS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h")
+    file(
+        STRINGS
+        "${TensorRT_INCLUDE_DIR}/NvInferVersion.h"
+        TensorRT_MAJOR
+        REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$"
+    )
+    file(
+        STRINGS
+        "${TensorRT_INCLUDE_DIR}/NvInferVersion.h"
+        TensorRT_MINOR
+        REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$"
+    )
+    file(
+        STRINGS
+        "${TensorRT_INCLUDE_DIR}/NvInferVersion.h"
+        TensorRT_PATCH
+        REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$"
+    )
+    file(
+        STRINGS
+        "${TensorRT_INCLUDE_DIR}/NvInferVersion.h"
+        TensorRT_TWEAK
+        REGEX "^#define NV_TENSORRT_BUILD [0-9]+.*$"
+    )
+
+    string(
+        REGEX REPLACE
+        "^#define NV_TENSORRT_MAJOR ([0-9]+).*$"
+        "\\1"
+        TensorRT_VERSION_MAJOR
+        "${TensorRT_MAJOR}"
+    )
+    string(
+        REGEX REPLACE
+        "^#define NV_TENSORRT_MINOR ([0-9]+).*$"
+        "\\1"
+        TensorRT_VERSION_MINOR
+        "${TensorRT_MINOR}"
+    )
+    string(
+        REGEX REPLACE
+        "^#define NV_TENSORRT_PATCH ([0-9]+).*$"
+        "\\1"
+        TensorRT_VERSION_PATCH
+        "${TensorRT_PATCH}"
+    )
+    string(
+        REGEX REPLACE
+        "^#define NV_TENSORRT_BUILD ([0-9]+).*$"
+        "\\1"
+        TensorRT_VERSION_TWEAK
+        "${TensorRT_TWEAK}"
+    )
+    set(TensorRT_VERSION
+        "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}.${TensorRT_VERSION_TWEAK}"
+    )
+endif()
+
+function(_find_trt_component component)
+    # Find library for component (ie nvinfer, nvparsers, etc...)
+    find_library(
+        TensorRT_${component}_LIBRARY
+        NAMES ${component}
+        PATHS ${TensorRT_ROOT} ${TENSORRT_LIBRARY_DIR}
+        ENV TensorRT_ROOT
+    )
+
+    string(FIND ${TensorRT_${component}_LIBRARY} "NOTFOUND" _library_not_found)
+
+    if(NOT TensorRT_LIBRARY_DIR)
+        get_filename_component(_path ${TensorRT_${component}_LIBRARY} DIRECTORY)
+        set(TensorRT_LIBRARY_DIR
+            "${_path}"
+            CACHE INTERNAL
+            "TensorRT_LIBRARY_DIR"
+        )
+    endif()
+
+    if(NOT TensorRT_LIBRARY_DIRS)
+        get_filename_component(_path ${TensorRT_${component}_LIBRARY} DIRECTORY)
+        set(TensorRT_LIBRARY_DIRS
+            "${_path}"
+            CACHE INTERNAL
+            "TensorRT_LIBRARY_DIRS"
+        )
+    endif()
+
+    # Library found, and doesn't already exists
+    if(_library_not_found EQUAL -1 AND NOT TARGET trt::${component})
+        set(TensorRT_${component}_FOUND
+            TRUE
+            CACHE INTERNAL
+            "Found ${component}"
+        )
+
+        # Create a target
+        add_library(trt::${component} IMPORTED INTERFACE)
+        target_include_directories(
+            trt::${component}
+            SYSTEM
+            INTERFACE "${TensorRT_INCLUDE_DIRS}"
+        )
+        target_link_libraries(
+            trt::${component}
+            INTERFACE "${TensorRT_${component}_LIBRARY}"
+        )
+        set(TensorRT_LIBRARIES
+            ${TensorRT_LIBRARIES}
+            ${TensorRT_${component}_LIBRARY}
+        )
+    endif()
+endfunction()
+
+# Find each components
+foreach(component IN LISTS TensorRT_FIND_COMPONENTS)
+    _find_trt_component(${component})
+endforeach()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(
+    TensorRT
+    HANDLE_COMPONENTS
+    VERSION_VAR TensorRT_VERSION
+    REQUIRED_VARS TensorRT_INCLUDE_DIR
+)

From 0d184243944f80f769b12725b2a608e6e73b4b60 Mon Sep 17 00:00:00 2001
From: Benjamin Huth <37871400+benjaminhuth@users.noreply.github.com>
Date: Thu, 16 Jan 2025 15:19:38 +0100
Subject: [PATCH 10/19] Update .gitlab-ci.yml

---
 .gitlab-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 83f897b7181..82b804d358e 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -188,7 +188,7 @@ test_exatrkx_python:
 
 build_gnn_tensorrt:
   stage: build
-  image: ghcr.io/acts-project/ubuntu2404_tensorrt:sha-b4f481f@sha256:8887aa00ad4394a53b4ca54968121d8893d537e5daf50805f1dd2030caef78ce
+  image: ghcr.io/acts-project/ubuntu2404_tensorrt:74
   variables:
     DEPENDENCY_URL: https://acts.web.cern.ch/ACTS/ci/ubuntu-24.04/deps.$DEPENDENCY_TAG.tar.zst
 

From b6337bb8ca205de4813006abb50279ccc6b9c97e Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Tue, 21 Jan 2025 10:02:53 +0100
Subject: [PATCH 11/19] update

---
 .../ExaTrkX/src/TensorRTEdgeClassifier.cpp    | 102 ++++++++++--------
 1 file changed, 60 insertions(+), 42 deletions(-)

diff --git a/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp b/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
index ef5d058d54c..fdd46ad6c1d 100644
--- a/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
+++ b/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
@@ -8,6 +8,7 @@
 
 #include "Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp"
 
+#include "Acts/Plugins/ExaTrkX/detail/CudaUtils.cuh"
 #include "Acts/Plugins/ExaTrkX/detail/Utils.hpp"
 
 #include <chrono>
@@ -65,7 +66,9 @@ TensorRTEdgeClassifier::TensorRTEdgeClassifier(
       m_cfg(cfg),
       m_trtLogger(std::make_unique<TensorRTLogger>(m_logger->level())) {
   auto status = initLibNvInferPlugins(m_trtLogger.get(), "");
-  assert(status);
+  if (!status) {
+    throw std::runtime_error("Failed to initialize TensorRT plugins");
+  }
 
   std::size_t fsize =
       std::filesystem::file_size(std::filesystem::path(m_cfg.modelPath));
@@ -74,47 +77,45 @@ TensorRTEdgeClassifier::TensorRTEdgeClassifier(
   ACTS_DEBUG("Load '" << m_cfg.modelPath << "' with size " << fsize);
 
   std::ifstream engineFile(m_cfg.modelPath);
-  engineFile.read(engineData.data(), fsize);
+  if (!engineFile) {
+    throw std::runtime_error("Failed to open engine file");
+  } else if (!engineFile.read(engineData.data(), fsize)) {
+    throw std::runtime_error("Failed to read engine file");
+  }
 
   m_runtime.reset(nvinfer1::createInferRuntime(*m_trtLogger));
+  if (!m_runtime) {
+    throw std::runtime_error("Failed to create TensorRT runtime");
+  }
 
   m_engine.reset(m_runtime->deserializeCudaEngine(engineData.data(), fsize));
+  if (!m_engine) {
+    throw std::runtime_error("Failed to deserialize CUDA engine");
+  }
 
-  m_context.reset(m_engine->createExecutionContext());
+  for (auto i = 0ul; i < m_cfg.numExecutionContexts; ++i) {
+    ACTS_DEBUG("Create execution context " << i);
+    m_contexts.emplace_back(m_engine->createExecutionContext());
+    if (!m_contexts.back()) {
+      throw std::runtime_error("Failed to create execution context");
+    }
+  }
+
+  std::size_t freeMem, totalMem;
+  cudaMemGetInfo(&freeMem, &totalMem);
+  ACTS_DEBUG("Used CUDA memory after TensorRT initialization: "
+             << (totalMem - freeMem) * 1e-9 << " / " << totalMem * 1e-9
+             << " GB");
 }
 
 TensorRTEdgeClassifier::~TensorRTEdgeClassifier() {}
 
-auto milliseconds = [](const auto &a, const auto &b) {
-  return std::chrono::duration<double, std::milli>(b - a).count();
-};
-
-struct TimePrinter {
-  const char *name;
-  decltype(std::chrono::high_resolution_clock::now()) t0, t1;
-  TimePrinter(const char *n) : name(n) {
-    t0 = std::chrono::high_resolution_clock::now();
-  }
-  ~TimePrinter() {
-    std::cout << name << ": " << milliseconds(t0, t1) << std::endl;
-  }
-};
-
-#if 0
-#define TIME_BEGIN(name) TimePrinter printer##name(#name);
-#define TIME_END(name) \
-  printer##name.t1 = std::chrono::high_resolution_clock::now();
-#else
-#define TIME_BEGIN(name) /*nothing*/
-#define TIME_END(name)   /*ǹothing*/
-#endif
-
 std::tuple<std::any, std::any, std::any, std::any>
 TensorRTEdgeClassifier::operator()(std::any inNodeFeatures,
                                    std::any inEdgeIndex,
                                    std::any inEdgeFeatures,
                                    const ExecutionContext &execContext) {
-  decltype(std::chrono::high_resolution_clock::now()) t0, t1, t2, t3, t4, t5;
+  decltype(std::chrono::high_resolution_clock::now()) t0, t1, t2, t3, t4;
   t0 = std::chrono::high_resolution_clock::now();
 
   c10::cuda::CUDAStreamGuard(execContext.stream.value());
@@ -131,34 +132,51 @@ TensorRTEdgeClassifier::operator()(std::any inNodeFeatures,
 
   t1 = std::chrono::high_resolution_clock::now();
 
-  m_context->setInputShape(
+  // get a context from the list of contexts
+  std::unique_ptr<nvinfer1::IExecutionContext> context;
+  while (true) {
+    std::lock_guard<std::mutex> lock(m_contextMutex);
+    if (!m_contexts.empty()) {
+      context = std::move(m_contexts.back());
+      m_contexts.pop_back();
+      break;
+    }
+  }
+  assert(context);
+
+  context->setInputShape(
       "x", nvinfer1::Dims2{nodeFeatures.size(0), nodeFeatures.size(1)});
-  m_context->setTensorAddress("x", nodeFeatures.data_ptr());
+  context->setTensorAddress("x", nodeFeatures.data_ptr());
 
-  m_context->setInputShape(
-      "edge_index", nvinfer1::Dims2{edgeIndex.size(0), edgeIndex.size(1)});
-  m_context->setTensorAddress("edge_index", edgeIndex.data_ptr());
+  context->setInputShape("edge_index",
+                         nvinfer1::Dims2{edgeIndex.size(0), edgeIndex.size(1)});
+  context->setTensorAddress("edge_index", edgeIndex.data_ptr());
 
-  m_context->setInputShape(
+  context->setInputShape(
       "edge_attr", nvinfer1::Dims2{edgeFeatures.size(0), edgeFeatures.size(1)});
-  m_context->setTensorAddress("edge_attr", edgeFeatures.data_ptr());
+  context->setTensorAddress("edge_attr", edgeFeatures.data_ptr());
 
   void *outputMem{nullptr};
   std::size_t outputSize = edgeIndex.size(1) * sizeof(float);
-  cudaMalloc(&outputMem, outputSize);
-  m_context->setTensorAddress("output", outputMem);
+  ACTS_CUDA_CHECK(cudaMalloc(&outputMem, outputSize));
+  context->setTensorAddress("output", outputMem);
 
   t2 = std::chrono::high_resolution_clock::now();
 
-  {
-    auto stream = execContext.stream.value().stream();
-    auto status = m_context->enqueueV3(stream);
-    cudaStreamSynchronize(stream);
-    ACTS_VERBOSE("TensorRT output status: " << std::boolalpha << status);
+  auto stream = execContext.stream.value().stream();
+  auto status = context->enqueueV3(stream);
+  if (!status) {
+    throw std::runtime_error("Failed to execute TensorRT model");
   }
+  ACTS_CUDA_CHECK(cudaStreamSynchronize(stream));
 
   t3 = std::chrono::high_resolution_clock::now();
 
+  {
+    std::lock_guard<std::mutex> lock(m_contextMutex);
+    m_contexts.push_back(std::move(context));
+  }
+
   auto scores = torch::from_blob(
       outputMem, edgeIndex.size(1), 1, [](void *ptr) { cudaFree(ptr); },
       torch::TensorOptions().device(torch::kCUDA).dtype(torch::kFloat32));

From bc06d42817afc6e4fa61a7131654f019462d7946 Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Tue, 21 Jan 2025 11:01:22 +0100
Subject: [PATCH 12/19] update

---
 .../include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
index 1eb24f5ade8..c74ed23a8fd 100644
--- a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
+++ b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
@@ -33,6 +33,8 @@ class TensorRTEdgeClassifier final : public Acts::EdgeClassificationBase {
     int deviceID = 0;
     bool useEdgeFeatures = false;
     bool doSigmoid = true;
+
+    std::size_t numExecutionContexts = 1;
   };
 
   TensorRTEdgeClassifier(const Config &cfg,
@@ -55,7 +57,9 @@ class TensorRTEdgeClassifier final : public Acts::EdgeClassificationBase {
   std::unique_ptr<nvinfer1::IRuntime> m_runtime;
   std::unique_ptr<nvinfer1::ICudaEngine> m_engine;
   std::unique_ptr<nvinfer1::ILogger> m_trtLogger;
-  std::unique_ptr<nvinfer1::IExecutionContext> m_context;
+
+  mutable std::mutex m_contextMutex;
+  mutable std::vector<std::unique_ptr<nvinfer1::IExecutionContext>> m_contexts;
 };
 
 }  // namespace Acts

From 91d986428a1abb50520a6fda2da47d36b1acaf13 Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Tue, 21 Jan 2025 11:18:36 +0100
Subject: [PATCH 13/19] update

---
 Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp b/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
index fdd46ad6c1d..402744bc028 100644
--- a/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
+++ b/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
@@ -197,6 +197,9 @@ TensorRTEdgeClassifier::operator()(std::any inNodeFeatures,
 
   t4 = std::chrono::high_resolution_clock::now();
 
+  auto milliseconds = [](const auto &a, const auto &b) {
+    return std::chrono::duration<double, std::milli>(b - a).count();
+  };
   ACTS_DEBUG("Time anycast:  " << milliseconds(t0, t1));
   ACTS_DEBUG("Time alloc, set shape " << milliseconds(t1, t2));
   ACTS_DEBUG("Time inference:       " << milliseconds(t2, t3));

From 57a9187d16c765ed853e61b20e71485fb2adb509 Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Tue, 21 Jan 2025 14:41:06 +0100
Subject: [PATCH 14/19] udpate

---
 .../ExaTrkX/TensorRTEdgeClassifier.hpp        |  3 +--
 .../ExaTrkX/src/TensorRTEdgeClassifier.cpp    | 21 ++++++++++---------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
index c74ed23a8fd..09e7e46c8f9 100644
--- a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
+++ b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
@@ -12,6 +12,7 @@
 #include "Acts/Utilities/Logger.hpp"
 
 #include <memory>
+#include <vector>
 
 #include <torch/torch.h>
 
@@ -31,8 +32,6 @@ class TensorRTEdgeClassifier final : public Acts::EdgeClassificationBase {
     std::vector<int> selectedFeatures = {};
     float cut = 0.21;
     int deviceID = 0;
-    bool useEdgeFeatures = false;
-    bool doSigmoid = true;
 
     std::size_t numExecutionContexts = 1;
   };
diff --git a/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp b/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
index 402744bc028..64f390442fa 100644
--- a/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
+++ b/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
@@ -115,34 +115,35 @@ TensorRTEdgeClassifier::operator()(std::any inNodeFeatures,
                                    std::any inEdgeIndex,
                                    std::any inEdgeFeatures,
                                    const ExecutionContext &execContext) {
+  assert(execContext.device.is_cuda());
   decltype(std::chrono::high_resolution_clock::now()) t0, t1, t2, t3, t4;
   t0 = std::chrono::high_resolution_clock::now();
 
   c10::cuda::CUDAStreamGuard(execContext.stream.value());
 
   auto nodeFeatures =
-      std::any_cast<torch::Tensor>(inNodeFeatures).to(torch::kCUDA);
+      std::any_cast<torch::Tensor>(inNodeFeatures).to(execContext.device);
 
-  auto edgeIndex = std::any_cast<torch::Tensor>(inEdgeIndex).to(torch::kCUDA);
+  auto edgeIndex =
+      std::any_cast<torch::Tensor>(inEdgeIndex).to(execContext.device);
   ACTS_DEBUG("edgeIndex: " << detail::TensorDetails{edgeIndex});
 
   auto edgeFeatures =
-      std::any_cast<torch::Tensor>(inEdgeFeatures).to(torch::kCUDA);
+      std::any_cast<torch::Tensor>(inEdgeFeatures).to(execContext.device);
   ACTS_DEBUG("edgeFeatures: " << detail::TensorDetails{edgeFeatures});
 
   t1 = std::chrono::high_resolution_clock::now();
 
   // get a context from the list of contexts
   std::unique_ptr<nvinfer1::IExecutionContext> context;
-  while (true) {
+  while (context == nullptr) {
     std::lock_guard<std::mutex> lock(m_contextMutex);
     if (!m_contexts.empty()) {
       context = std::move(m_contexts.back());
       m_contexts.pop_back();
-      break;
     }
   }
-  assert(context);
+  assert(context != nullptr);
 
   context->setInputShape(
       "x", nvinfer1::Dims2{nodeFeatures.size(0), nodeFeatures.size(1)});
@@ -156,10 +157,10 @@ TensorRTEdgeClassifier::operator()(std::any inNodeFeatures,
       "edge_attr", nvinfer1::Dims2{edgeFeatures.size(0), edgeFeatures.size(1)});
   context->setTensorAddress("edge_attr", edgeFeatures.data_ptr());
 
-  void *outputMem{nullptr};
-  std::size_t outputSize = edgeIndex.size(1) * sizeof(float);
-  ACTS_CUDA_CHECK(cudaMalloc(&outputMem, outputSize));
-  context->setTensorAddress("output", outputMem);
+  auto scores = torch::empty(
+      edgeIndex.size(1),
+      torch::TensorOptions().device(torch::kCUDA).dtype(torch::kFloat32));
+  context->setTensorAddress("output", scores.data_ptr());
 
   t2 = std::chrono::high_resolution_clock::now();
 

From a36cab646e4ace710297fa1713059b2842b15f1f Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Tue, 21 Jan 2025 14:42:51 +0100
Subject: [PATCH 15/19] update

---
 .../include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp    | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
index 09e7e46c8f9..dd3ac90625f 100644
--- a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
+++ b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
@@ -30,8 +30,7 @@ class TensorRTEdgeClassifier final : public Acts::EdgeClassificationBase {
   struct Config {
     std::string modelPath;
     std::vector<int> selectedFeatures = {};
-    float cut = 0.21;
-    int deviceID = 0;
+    float cut = 0.5;
 
     std::size_t numExecutionContexts = 1;
   };

From 724354412f235cc08d0b418ad6d9331dcb111b8d Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Tue, 21 Jan 2025 16:15:06 +0100
Subject: [PATCH 16/19] update

---
 Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp b/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
index 64f390442fa..5fd0903afcf 100644
--- a/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
+++ b/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
@@ -178,10 +178,6 @@ TensorRTEdgeClassifier::operator()(std::any inNodeFeatures,
     m_contexts.push_back(std::move(context));
   }
 
-  auto scores = torch::from_blob(
-      outputMem, edgeIndex.size(1), 1, [](void *ptr) { cudaFree(ptr); },
-      torch::TensorOptions().device(torch::kCUDA).dtype(torch::kFloat32));
-
   scores.sigmoid_();
 
   ACTS_VERBOSE("Size after classifier: " << scores.size(0));

From c0e6dc98d98244887d618b17e996bff503431927 Mon Sep 17 00:00:00 2001
From: Benjamin Huth <37871400+benjaminhuth@users.noreply.github.com>
Date: Wed, 29 Jan 2025 11:11:41 +0100
Subject: [PATCH 17/19] Apply suggestions from code review

Co-authored-by: Andreas Stefl <stefl.andreas@gmail.com>
---
 .../include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
index dd3ac90625f..abf08f0c62c 100644
--- a/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
+++ b/Plugins/ExaTrkX/include/Acts/Plugins/ExaTrkX/TensorRTEdgeClassifier.hpp
@@ -25,11 +25,11 @@ class IExecutionContext;
 
 namespace Acts {
 
-class TensorRTEdgeClassifier final : public Acts::EdgeClassificationBase {
+class TensorRTEdgeClassifier final : public EdgeClassificationBase {
  public:
   struct Config {
     std::string modelPath;
-    std::vector<int> selectedFeatures = {};
+    std::vector<int> selectedFeatures;
     float cut = 0.5;
 
     std::size_t numExecutionContexts = 1;

From ae40e863f0b5e6b8b2a93e462075c6d2eb2e2c84 Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Wed, 29 Jan 2025 11:16:42 +0100
Subject: [PATCH 18/19] update

---
 .gitlab-ci.yml | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 94bac79414f..9629dce9ff5 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -193,12 +193,7 @@ build_gnn_tensorrt:
     DEPENDENCY_URL: https://acts.web.cern.ch/ACTS/ci/ubuntu-24.04/deps.$DEPENDENCY_TAG.tar.zst
 
   cache:
-    key: ccache-${CI_JOB_NAME}-${CI_COMMIT_REF_SLUG}-${CCACHE_KEY_SUFFIX}
-    fallback_keys:
-      - ccache-${CI_JOB_NAME}-${CI_DEFAULT_BRANCH}-${CCACHE_KEY_SUFFIX}
-    when: always
-    paths:
-      - ${CCACHE_DIR}
+    - !reference [.ccache_base, cache]
 
   tags:
     - docker-gpu-nvidia

From c2fd77323593fd752ab5f439e83d95ddcdc3f5cd Mon Sep 17 00:00:00 2001
From: Benjamin Huth <benjamin.huth@cern.ch>
Date: Wed, 29 Jan 2025 11:19:25 +0100
Subject: [PATCH 19/19] update

---
 Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp b/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
index 5fd0903afcf..4303fe046dd 100644
--- a/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
+++ b/Plugins/ExaTrkX/src/TensorRTEdgeClassifier.cpp
@@ -101,8 +101,8 @@ TensorRTEdgeClassifier::TensorRTEdgeClassifier(
     }
   }
 
-  std::size_t freeMem, totalMem;
-  cudaMemGetInfo(&freeMem, &totalMem);
+  std::size_t freeMem{}, totalMem{};
+  ACTS_CUDA_CHECK(cudaMemGetInfo(&freeMem, &totalMem));
   ACTS_DEBUG("Used CUDA memory after TensorRT initialization: "
              << (totalMem - freeMem) * 1e-9 << " / " << totalMem * 1e-9
              << " GB");