From 4ee0fce531eb6e0aa793d895101846115518ea5c Mon Sep 17 00:00:00 2001 From: Kris Hung Date: Thu, 11 Jan 2024 11:57:35 -0800 Subject: [PATCH 1/4] Clean up response iterator map properly (#335) --- src/pb_stub.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/pb_stub.cc b/src/pb_stub.cc index d1f8f6fd..a7d39852 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -372,6 +372,14 @@ Stub::RunCommand() } break; case PYTHONSTUB_CommandType::PYTHONSTUB_FinalizeRequest: ipc_message->Command() = PYTHONSTUB_FinalizeResponse; + // Clean up response_iterator_map_ before sending sending message back to + // the parent process to make sure that the clean up message can be + // processed before the message queue is destroyed. + { + std::lock_guard lock(response_iterator_map_mu_); + std::unordered_map>().swap( + response_iterator_map_); + } SendIPCMessage(ipc_message); return true; // Terminate the stub process case PYTHONSTUB_CommandType::PYTHONSTUB_LoadGPUBuffers: @@ -1049,7 +1057,7 @@ Stub::SendCleanupId( const PYTHONSTUB_CommandType& command_type) { void* id = utils_msg_payload->utils_message_ptr; - { + if (command_type == PYTHONSTUB_BLSDecoupledInferPayloadCleanup) { std::lock_guard lock(response_iterator_map_mu_); response_iterator_map_.erase(id); } From 980a5bb00c3b136e9464d7667718f462e083afb9 Mon Sep 17 00:00:00 2001 From: Olga Andreeva <124622579+oandreeva-nv@users.noreply.github.com> Date: Thu, 11 Jan 2024 12:02:19 -0800 Subject: [PATCH 2/4] Bumping min required cxx standard to 17 (#332) --- CMakeLists.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6fae6a00..2b47df1d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,9 @@ cmake_minimum_required(VERSION 3.17) project(tritonpythonbackend LANGUAGES C CXX) +# Use C++17 standard as Triton's minimum required. +set(TRITON_MIN_CXX_STANDARD 17 CACHE STRING "The minimum C++ standard which features are requested to build this target.") + # # Options # @@ -231,14 +234,14 @@ add_library( TritonPythonBackend::triton-python-backend ALIAS triton-python-backend ) -target_compile_features(triton-python-backend PRIVATE cxx_std_11) +target_compile_features(triton-python-backend PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD}) target_compile_options( triton-python-backend PRIVATE $<$,$,$>: -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror> ) -target_compile_features(triton-python-backend-stub PRIVATE cxx_std_11) +target_compile_features(triton-python-backend-stub PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD}) target_compile_options( triton-python-backend-stub PRIVATE $<$,$,$>: From 9d67dc39d2e42658c650525eccc836b2e991627b Mon Sep 17 00:00:00 2001 From: Olga Andreeva <124622579+oandreeva-nv@users.noreply.github.com> Date: Thu, 18 Jan 2024 11:21:50 -0800 Subject: [PATCH 3/4] Changing cuda cxx flag (#338) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b47df1d..2be987cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -119,7 +119,7 @@ set(boostorg_INCLUDE_DIRS "${CMAKE_BINARY_DIR}/boost/") if(${TRITON_ENABLE_GPU}) find_package(CUDAToolkit REQUIRED) message(STATUS "Using CUDA ${CUDA_VERSION}") - set(CUDA_NVCC_FLAGS -std=c++11) + set(CUDA_NVCC_FLAGS -std=c++${TRITON_MIN_CXX_STANDARD}) elseif() message(WARNING "TRITON_ENABLE_GPU is OFF, GPU Tensor support will be disabled") endif() # TRITON_ENABLE_GPU From 37d29025f8da7c81cf9b6d88f5ff4d44e389a732 Mon Sep 17 00:00:00 2001 From: Jacky <18255193+kthui@users.noreply.github.com> Date: Fri, 19 Jan 2024 15:33:58 -0800 Subject: [PATCH 4/4] Improve decoupled shm handling (#337) * [DO NOT MERGE] Add shm trace util * [DO NOT MERGE] Expand shm leak util naming to ipc load * Revert "[DO NOT MERGE] Expand shm leak util naming to ipc load" This reverts commit 68906f2dd32fa70fe247321391ce26967d04ec5a. * Revert "[DO NOT MERGE] Add shm trace util" This reverts commit 37824ce137b009e0ef13b46f440e1f94c865180e. * Fix decoupled shared memory leak --- src/python_be.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/python_be.cc b/src/python_be.cc index 3c9dd19d..a8dfab07 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -1328,6 +1328,7 @@ ModelInstanceState::ProcessRequestsDecoupled( AllocatedSharedMemory response_batch = Stub()->ShmPool()->Load(received_message_->Args()); + received_message_.reset(); uint64_t compute_end_ns = 0; SET_TIMESTAMP(compute_end_ns);