From ad1f7c1c8c45544c45e1e4cf81e4cdcc505a3506 Mon Sep 17 00:00:00 2001 From: Olga Andreeva <124622579+oandreeva-nv@users.noreply.github.com> Date: Fri, 15 Mar 2024 16:11:41 -0700 Subject: [PATCH] Exposing trace context to python backend (#346) (#347) Exposing trace context to python backend --- CMakeLists.txt | 2 + src/infer_request.cc | 17 +++++-- src/infer_request.h | 24 ++-------- src/infer_trace.cc | 101 ++++++++++++++++++++++++++++++++++++++++ src/infer_trace.h | 90 +++++++++++++++++++++++++++++++++++ src/pb_stub.cc | 11 ++++- src/python_be.cc | 14 +++++- src/request_executor.cc | 6 ++- 8 files changed, 235 insertions(+), 30 deletions(-) create mode 100644 src/infer_trace.cc create mode 100644 src/infer_trace.h diff --git a/CMakeLists.txt b/CMakeLists.txt index dacd0f9c..46f4bfe4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -153,6 +153,8 @@ set( src/infer_response.h src/infer_request.cc src/infer_request.h + src/infer_trace.cc + src/infer_trace.h src/message_queue.h src/ipc_message.cc src/ipc_message.h diff --git a/src/infer_request.cc b/src/infer_request.cc index f18900d0..aa34447e 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -170,7 +170,7 @@ InferRequest::GetPreferredMemory() } InferenceTrace& -InferRequest::Trace() +InferRequest::GetTrace() { return trace_; } @@ -214,7 +214,6 @@ InferRequest::SaveToSharedMemory(std::unique_ptr& shm_pool) infer_request_shm_ptr_->is_decoupled = is_decoupled_; infer_request_shm_ptr_->timeout = timeout_; infer_request_shm_ptr_->preferred_memory = preferred_memory_; - infer_request_shm_ptr_->trace = trace_; infer_request_shm_ptr_->request_release_flags = request_release_flags_; output_names_handle_shm_ptr_ = @@ -271,6 +270,9 @@ InferRequest::SaveToSharedMemory(std::unique_ptr& shm_pool) reinterpret_cast(infer_request_shm_ptr_) + parameters_offset, infer_request_shm.handle_ + parameters_offset); + trace_.SaveToSharedMemory(shm_pool); + infer_request_shm_ptr_->trace_shm_handle = trace_.ShmHandle(); + // Save the references to shared memory. infer_request_shm_ = std::move(infer_request_shm); request_id_shm_ = std::move(request_id_shm); @@ -327,6 +329,10 @@ InferRequest::LoadFromSharedMemory( (infer_request_shm_ptr->input_count * sizeof(bi::managed_external_buffer::handle_t)); + std::unique_ptr infer_trace_shm = + InferenceTrace::LoadFromSharedMemory( + shm_pool, infer_request_shm_ptr->trace_shm_handle); + std::unique_ptr model_name_shm = PbString::LoadFromSharedMemory( request_handle + model_name_offset, reinterpret_cast(infer_request_shm_ptr) + model_name_offset); @@ -343,7 +349,7 @@ InferRequest::LoadFromSharedMemory( return std::unique_ptr(new InferRequest( infer_request_shm, request_id_shm, requested_output_names_shm, - model_name_shm, input_tensors, parameters_shm)); + model_name_shm, input_tensors, parameters_shm, infer_trace_shm)); } InferRequest::InferRequest( @@ -352,7 +358,8 @@ InferRequest::InferRequest( std::vector>& requested_output_names_shm, std::unique_ptr& model_name_shm, std::vector>& input_tensors, - std::unique_ptr& parameters_shm) + std::unique_ptr& parameters_shm, + std::unique_ptr& infer_trace_shm) : infer_request_shm_(std::move(infer_request_shm)), request_id_shm_(std::move(request_id_shm)), requested_output_names_shm_(std::move(requested_output_names_shm)), @@ -393,7 +400,7 @@ InferRequest::InferRequest( is_decoupled_ = infer_request_shm_ptr_->is_decoupled; timeout_ = infer_request_shm_ptr_->timeout; preferred_memory_ = infer_request_shm_ptr_->preferred_memory; - trace_ = infer_request_shm_ptr_->trace; + trace_ = InferenceTrace(infer_trace_shm); request_release_flags_ = infer_request_shm_ptr_->request_release_flags; #ifdef TRITON_PB_STUB diff --git a/src/infer_request.h b/src/infer_request.h index ba586535..9bf9dfdb 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -30,6 +30,7 @@ #include #include "infer_response.h" +#include "infer_trace.h" #include "pb_preferred_memory.h" #include "pb_tensor.h" @@ -42,22 +43,6 @@ namespace triton { namespace backend { namespace python { class Stub; -// -// Inference Trace -// -struct InferenceTrace { -#ifndef TRITON_PB_STUB - TRITONSERVER_InferenceTrace* triton_trace_; - InferenceTrace(TRITONSERVER_InferenceTrace* triton_trace) - : triton_trace_(triton_trace) - { - } -#else - void* triton_trace_; -#endif - InferenceTrace() : triton_trace_(nullptr) {} -}; - // // Inference Request // @@ -72,7 +57,7 @@ struct InferRequestShm { bool is_decoupled; uint64_t timeout; PreferredMemory preferred_memory; - InferenceTrace trace; + bi::managed_external_buffer::handle_t trace_shm_handle; uint32_t request_release_flags; }; @@ -104,7 +89,7 @@ class InferRequest { bool IsDecoupled(); void SetIsDecoupled(const bool is_decoupled); PreferredMemory& GetPreferredMemory(); - InferenceTrace& Trace(); + InferenceTrace& GetTrace(); uint32_t ReleaseFlags(); void SetReleaseFlags(const uint32_t& flags); @@ -144,7 +129,8 @@ class InferRequest { std::vector>& requested_output_names_shm, std::unique_ptr& model_name_shm, std::vector>& input_tensors, - std::unique_ptr& parameters_shm); + std::unique_ptr& parameters_shm, + std::unique_ptr& infer_trace_shm); std::string request_id_; uint64_t correlation_id_; diff --git a/src/infer_trace.cc b/src/infer_trace.cc new file mode 100644 index 00000000..50645dcc --- /dev/null +++ b/src/infer_trace.cc @@ -0,0 +1,101 @@ +// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "infer_trace.h" + +namespace triton { namespace backend { namespace python { + +InferenceTrace::InferenceTrace(const InferenceTrace& rhs) +{ + triton_trace_ = rhs.triton_trace_; + trace_context_ = rhs.trace_context_; +} + +InferenceTrace& +InferenceTrace::operator=(const InferenceTrace& rhs) +{ + triton_trace_ = rhs.triton_trace_; + trace_context_ = rhs.trace_context_; + return *this; +} + +InferenceTrace::InferenceTrace(std::unique_ptr& trace_shm) +{ + triton_trace_ = trace_shm->triton_trace_; + trace_context_ = trace_shm->trace_context_; +} + +void +InferenceTrace::SaveToSharedMemory( + std::unique_ptr& shm_pool) +{ + AllocatedSharedMemory infer_trace_shm = + shm_pool->Construct(); + infer_trace_shm_ptr_ = infer_trace_shm.data_.get(); + + infer_trace_shm_ptr_->triton_trace = triton_trace_; + + std::unique_ptr trace_context_shm = + PbString::Create(shm_pool, trace_context_); + + infer_trace_shm_ptr_->trace_context_shm_handle = + trace_context_shm->ShmHandle(); + + // Save the references to shared memory. + trace_context_shm_ = std::move(trace_context_shm); + infer_trace_shm_ = std::move(infer_trace_shm); + shm_handle_ = infer_trace_shm_.handle_; +} + +std::unique_ptr +InferenceTrace::LoadFromSharedMemory( + std::unique_ptr& shm_pool, + bi::managed_external_buffer::handle_t handle) +{ + AllocatedSharedMemory infer_trace_shm = + shm_pool->Load(handle); + InferenceTraceShm* infer_trace_shm_ptr = infer_trace_shm.data_.get(); + + std::unique_ptr trace_context_shm = PbString::LoadFromSharedMemory( + shm_pool, infer_trace_shm_ptr->trace_context_shm_handle); + + return std::unique_ptr( + new InferenceTrace(infer_trace_shm, trace_context_shm)); +} + +InferenceTrace::InferenceTrace( + AllocatedSharedMemory& infer_trace_shm, + std::unique_ptr& trace_context_shm) + : infer_trace_shm_(std::move(infer_trace_shm)), + trace_context_shm_(std::move(trace_context_shm)) +{ + infer_trace_shm_ptr_ = infer_trace_shm_.data_.get(); + shm_handle_ = infer_trace_shm_.handle_; + triton_trace_ = infer_trace_shm_ptr_->triton_trace; + trace_context_ = trace_context_shm_->String(); +} + +}}}; // namespace triton::backend::python diff --git a/src/infer_trace.h b/src/infer_trace.h new file mode 100644 index 00000000..aac9137f --- /dev/null +++ b/src/infer_trace.h @@ -0,0 +1,90 @@ +// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include + +#include "pb_string.h" +#include "pb_utils.h" + +namespace triton { namespace backend { namespace python { + +struct InferenceTraceShm { + bi::managed_external_buffer::handle_t trace_context_shm_handle; + // The address of the 'TRITONSERVER_InferTrace' object. + void* triton_trace; +}; + +// +// Inference Trace +// +class InferenceTrace { + public: + InferenceTrace(void* triton_trace, const std::string& ctxt) + : triton_trace_(triton_trace), trace_context_(ctxt) + { + } + InferenceTrace() : triton_trace_(nullptr), trace_context_("") {} + InferenceTrace(const InferenceTrace& rhs); + InferenceTrace(std::unique_ptr& trace_shm); + InferenceTrace& operator=(const InferenceTrace& rhs); + /// Save InferenceTrace object to shared memory. + /// \param shm_pool Shared memory pool to save the InferenceTrace object. + void SaveToSharedMemory(std::unique_ptr& shm_pool); + + /// Create a InferenceTrace object from shared memory. + /// \param shm_pool Shared memory pool + /// \param handle Shared memory handle of the InferenceTrace. + /// \return Returns the InferenceTrace in the specified handle + /// location. + static std::unique_ptr LoadFromSharedMemory( + std::unique_ptr& shm_pool, + bi::managed_external_buffer::handle_t handle); + + void* TritonTrace() { return triton_trace_; } + const std::string& Context() const { return trace_context_; } + + bi::managed_external_buffer::handle_t ShmHandle() { return shm_handle_; } + + private: + // The private constructor for creating a InferenceTrace object from shared + // memory. + InferenceTrace( + AllocatedSharedMemory& infer_trace_shm, + std::unique_ptr& trace_context_shm); + + void* triton_trace_; + std::string trace_context_; + + // Shared Memory Data Structures + AllocatedSharedMemory infer_trace_shm_; + InferenceTraceShm* infer_trace_shm_ptr_; + bi::managed_external_buffer::handle_t shm_handle_; + std::unique_ptr trace_context_shm_; +}; + +}}}; // namespace triton::backend::python diff --git a/src/pb_stub.cc b/src/pb_stub.cc index 26003f71..695b02f5 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -1610,7 +1610,14 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) .export_values(); py::class_>( - module, "InferenceTrace"); + module, "InferenceTrace") + .def("get_context", [](InferenceTrace& self) -> py::object { + auto context = self.Context(); + if (context != "") { + return py::str(context); + } + return py::none(); + }); py::class_>( module, "InferenceRequest") @@ -1674,7 +1681,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) .def("set_flags", &InferRequest::SetFlags) .def("timeout", &InferRequest::Timeout) .def("parameters", &InferRequest::Parameters) - .def("trace", &InferRequest::Trace) + .def("trace", &InferRequest::GetTrace) .def( "exec", [](std::shared_ptr& infer_request, diff --git a/src/python_be.cc b/src/python_be.cc index 0fa318ff..b9ba7302 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -371,14 +371,25 @@ ModelInstanceState::SaveRequestsToSharedMemory( // Do not return if error in this case, because Triton core // will return an error if tracing is disabled (see PYBE PR#295). + // For the same reason, we do not log the error message, otherwise + // when Triton is compiled without tracing, it'll constantly log + // this error. TRITONSERVER_InferenceTrace* triton_trace; auto err = TRITONBACKEND_RequestTrace(request, &triton_trace); if (err != nullptr) { triton_trace = nullptr; TRITONSERVER_ErrorDelete(err); } + const char* val = nullptr; + if (triton_trace != nullptr) { + LOG_IF_ERROR( + TRITONSERVER_InferenceTraceContext(triton_trace, &val), + "failed to retrieve trace context"); + } + std::string context = (val != nullptr) ? std::string(val) : ""; - InferenceTrace trace = InferenceTrace(triton_trace); + InferenceTrace trace = + InferenceTrace(reinterpret_cast(triton_trace), context); uint64_t request_timeout; RETURN_IF_ERROR(TRITONBACKEND_InferenceRequestTimeoutMicroseconds( @@ -403,7 +414,6 @@ ModelInstanceState::SaveRequestsToSharedMemory( reinterpret_cast(request), PreferredMemory(PreferredMemory::kDefault, 0), trace); } - RETURN_IF_EXCEPTION(infer_request->SaveToSharedMemory(Stub()->ShmPool())); requests_shm[r] = infer_request->ShmHandle(); pb_infer_requests.emplace_back(std::move(infer_request)); diff --git a/src/request_executor.cc b/src/request_executor.cc index d78972a5..39a4b9b6 100644 --- a/src/request_executor.cc +++ b/src/request_executor.cc @@ -367,9 +367,11 @@ RequestExecutor::Infer( irequest, InferRequestComplete, nullptr /* request_release_userp */)); TRITONSERVER_InferenceTrace* trace = nullptr; - if (infer_request->Trace().triton_trace_ != nullptr) { + if (infer_request->GetTrace().TritonTrace() != nullptr) { THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceTraceSpawnChildTrace( - infer_request->Trace().triton_trace_, &trace)); + reinterpret_cast( + infer_request->GetTrace().TritonTrace()), + &trace)); } const std::string& param_str = infer_request->Parameters();