From 4ee0fce531eb6e0aa793d895101846115518ea5c Mon Sep 17 00:00:00 2001
From: Kris Hung <krish@nvidia.com>
Date: Thu, 11 Jan 2024 11:57:35 -0800
Subject: [PATCH 1/4] Clean up response iterator map properly (#335)

---
 src/pb_stub.cc | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/src/pb_stub.cc b/src/pb_stub.cc
index d1f8f6fd..a7d39852 100644
--- a/src/pb_stub.cc
+++ b/src/pb_stub.cc
@@ -372,6 +372,14 @@ Stub::RunCommand()
     } break;
     case PYTHONSTUB_CommandType::PYTHONSTUB_FinalizeRequest:
       ipc_message->Command() = PYTHONSTUB_FinalizeResponse;
+      // Clean up response_iterator_map_ before sending sending message back to
+      // the parent process to make sure that the clean up message can be
+      // processed before the message queue is destroyed.
+      {
+        std::lock_guard<std::mutex> lock(response_iterator_map_mu_);
+        std::unordered_map<void*, std::shared_ptr<ResponseIterator>>().swap(
+            response_iterator_map_);
+      }
       SendIPCMessage(ipc_message);
       return true;  // Terminate the stub process
     case PYTHONSTUB_CommandType::PYTHONSTUB_LoadGPUBuffers:
@@ -1049,7 +1057,7 @@ Stub::SendCleanupId(
     const PYTHONSTUB_CommandType& command_type)
 {
   void* id = utils_msg_payload->utils_message_ptr;
-  {
+  if (command_type == PYTHONSTUB_BLSDecoupledInferPayloadCleanup) {
     std::lock_guard<std::mutex> lock(response_iterator_map_mu_);
     response_iterator_map_.erase(id);
   }

From 980a5bb00c3b136e9464d7667718f462e083afb9 Mon Sep 17 00:00:00 2001
From: Olga Andreeva <124622579+oandreeva-nv@users.noreply.github.com>
Date: Thu, 11 Jan 2024 12:02:19 -0800
Subject: [PATCH 2/4] Bumping min required cxx standard to 17 (#332)

---
 CMakeLists.txt | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6fae6a00..2b47df1d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,6 +28,9 @@ cmake_minimum_required(VERSION 3.17)
 
 project(tritonpythonbackend LANGUAGES C CXX)
 
+# Use C++17 standard as Triton's minimum required.
+set(TRITON_MIN_CXX_STANDARD 17 CACHE STRING "The minimum C++ standard which features are requested to build this target.")
+
 #
 # Options
 #
@@ -231,14 +234,14 @@ add_library(
   TritonPythonBackend::triton-python-backend ALIAS triton-python-backend
 )
 
-target_compile_features(triton-python-backend PRIVATE cxx_std_11)
+target_compile_features(triton-python-backend PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
 target_compile_options(
   triton-python-backend PRIVATE
   $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
     -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
 )
 
-target_compile_features(triton-python-backend-stub PRIVATE cxx_std_11)
+target_compile_features(triton-python-backend-stub PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
 target_compile_options(
   triton-python-backend-stub PRIVATE
   $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:

From 9d67dc39d2e42658c650525eccc836b2e991627b Mon Sep 17 00:00:00 2001
From: Olga Andreeva <124622579+oandreeva-nv@users.noreply.github.com>
Date: Thu, 18 Jan 2024 11:21:50 -0800
Subject: [PATCH 3/4] Changing cuda cxx flag (#338)

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2b47df1d..2be987cd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -119,7 +119,7 @@ set(boostorg_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/boost/")
 if(${TRITON_ENABLE_GPU})
   find_package(CUDAToolkit REQUIRED)
   message(STATUS "Using CUDA ${CUDA_VERSION}")
-  set(CUDA_NVCC_FLAGS -std=c++11)
+  set(CUDA_NVCC_FLAGS -std=c++${TRITON_MIN_CXX_STANDARD})
 elseif()
   message(WARNING "TRITON_ENABLE_GPU is OFF, GPU Tensor support will be disabled")
 endif() # TRITON_ENABLE_GPU

From 37d29025f8da7c81cf9b6d88f5ff4d44e389a732 Mon Sep 17 00:00:00 2001
From: Jacky <18255193+kthui@users.noreply.github.com>
Date: Fri, 19 Jan 2024 15:33:58 -0800
Subject: [PATCH 4/4] Improve decoupled shm handling (#337)

* [DO NOT MERGE] Add shm trace util

* [DO NOT MERGE] Expand shm leak util naming to ipc load

* Revert "[DO NOT MERGE] Expand shm leak util naming to ipc load"

This reverts commit 68906f2dd32fa70fe247321391ce26967d04ec5a.

* Revert "[DO NOT MERGE] Add shm trace util"

This reverts commit 37824ce137b009e0ef13b46f440e1f94c865180e.

* Fix decoupled shared memory leak
---
 src/python_be.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/python_be.cc b/src/python_be.cc
index 3c9dd19d..a8dfab07 100644
--- a/src/python_be.cc
+++ b/src/python_be.cc
@@ -1328,6 +1328,7 @@ ModelInstanceState::ProcessRequestsDecoupled(
 
   AllocatedSharedMemory<ResponseBatch> response_batch =
       Stub()->ShmPool()->Load<ResponseBatch>(received_message_->Args());
+  received_message_.reset();
 
   uint64_t compute_end_ns = 0;
   SET_TIMESTAMP(compute_end_ns);