Merge remote-tracking branch 'origin/main' into fpetrini-py-to-win

triton-inference-server · Jan 22, 2024 · 333ba38 · 333ba38
2 parents c60f41b + 37d2902
commit 333ba38
Show file tree

Hide file tree

Showing 3 changed files with 16 additions and 4 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -28,6 +28,9 @@ cmake_minimum_required(VERSION 3.17)
 
 project(tritonpythonbackend LANGUAGES C CXX)
 
+# Use C++17 standard as Triton's minimum required.
+set(TRITON_MIN_CXX_STANDARD 17 CACHE STRING "The minimum C++ standard which features are requested to build this target.")
+
 #
 # Options
 #
@@ -119,7 +122,7 @@ set(boostorg_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/boost/")
 if(${TRITON_ENABLE_GPU})
   find_package(CUDAToolkit REQUIRED)
   message(STATUS "Using CUDA ${CUDA_VERSION}")
-  set(CUDA_NVCC_FLAGS -std=c++11)
+  set(CUDA_NVCC_FLAGS -std=c++${TRITON_MIN_CXX_STANDARD})
 elseif()
   message(WARNING "TRITON_ENABLE_GPU is OFF, GPU Tensor support will be disabled")
 endif() # TRITON_ENABLE_GPU
@@ -237,15 +240,15 @@ add_library(
   TritonPythonBackend::triton-python-backend ALIAS triton-python-backend
 )
 
-target_compile_features(triton-python-backend PRIVATE cxx_std_11)
+target_compile_features(triton-python-backend PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
 target_compile_options(
   triton-python-backend PRIVATE
   $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
     -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
   $<$<CXX_COMPILER_ID:MSVC>:/Wall /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor>
 )
 
-target_compile_features(triton-python-backend-stub PRIVATE cxx_std_11)
+target_compile_features(triton-python-backend-stub PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD})
 target_compile_options(
   triton-python-backend-stub PRIVATE
   $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:

diff --git a/src/pb_stub.cc b/src/pb_stub.cc
@@ -380,6 +380,14 @@ Stub::RunCommand()
     } break;
     case PYTHONSTUB_CommandType::PYTHONSTUB_FinalizeRequest:
       ipc_message->Command() = PYTHONSTUB_FinalizeResponse;
+      // Clean up response_iterator_map_ before sending sending message back to
+      // the parent process to make sure that the clean up message can be
+      // processed before the message queue is destroyed.
+      {
+        std::lock_guard<std::mutex> lock(response_iterator_map_mu_);
+        std::unordered_map<void*, std::shared_ptr<ResponseIterator>>().swap(
+            response_iterator_map_);
+      }
       SendIPCMessage(ipc_message);
       return true;  // Terminate the stub process
     case PYTHONSTUB_CommandType::PYTHONSTUB_LoadGPUBuffers:
@@ -1057,7 +1065,7 @@ Stub::SendCleanupId(
     const PYTHONSTUB_CommandType& command_type)
 {
   void* id = utils_msg_payload->utils_message_ptr;
-  {
+  if (command_type == PYTHONSTUB_BLSDecoupledInferPayloadCleanup) {
     std::lock_guard<std::mutex> lock(response_iterator_map_mu_);
     response_iterator_map_.erase(id);
   }

diff --git a/src/python_be.cc b/src/python_be.cc
@@ -1325,6 +1325,7 @@ ModelInstanceState::ProcessRequestsDecoupled(
 
   AllocatedSharedMemory<ResponseBatch> response_batch =
       Stub()->ShmPool()->Load<ResponseBatch>(received_message_->Args());
+  received_message_.reset();
 
   uint64_t compute_end_ns = 0;
   SET_TIMESTAMP(compute_end_ns);