Skip to content

Commit

Permalink
Exposing trace context to python backend (#346) (#347)
Browse files Browse the repository at this point in the history
Exposing trace context to python backend
  • Loading branch information
oandreeva-nv authored Mar 15, 2024
1 parent 0413e46 commit ad1f7c1
Show file tree
Hide file tree
Showing 8 changed files with 235 additions and 30 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ set(
src/infer_response.h
src/infer_request.cc
src/infer_request.h
src/infer_trace.cc
src/infer_trace.h
src/message_queue.h
src/ipc_message.cc
src/ipc_message.h
Expand Down
17 changes: 12 additions & 5 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ InferRequest::GetPreferredMemory()
}

InferenceTrace&
InferRequest::Trace()
InferRequest::GetTrace()
{
return trace_;
}
Expand Down Expand Up @@ -214,7 +214,6 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
infer_request_shm_ptr_->is_decoupled = is_decoupled_;
infer_request_shm_ptr_->timeout = timeout_;
infer_request_shm_ptr_->preferred_memory = preferred_memory_;
infer_request_shm_ptr_->trace = trace_;
infer_request_shm_ptr_->request_release_flags = request_release_flags_;

output_names_handle_shm_ptr_ =
Expand Down Expand Up @@ -271,6 +270,9 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
reinterpret_cast<char*>(infer_request_shm_ptr_) + parameters_offset,
infer_request_shm.handle_ + parameters_offset);

trace_.SaveToSharedMemory(shm_pool);
infer_request_shm_ptr_->trace_shm_handle = trace_.ShmHandle();

// Save the references to shared memory.
infer_request_shm_ = std::move(infer_request_shm);
request_id_shm_ = std::move(request_id_shm);
Expand Down Expand Up @@ -327,6 +329,10 @@ InferRequest::LoadFromSharedMemory(
(infer_request_shm_ptr->input_count *
sizeof(bi::managed_external_buffer::handle_t));

std::unique_ptr<InferenceTrace> infer_trace_shm =
InferenceTrace::LoadFromSharedMemory(
shm_pool, infer_request_shm_ptr->trace_shm_handle);

std::unique_ptr<PbString> model_name_shm = PbString::LoadFromSharedMemory(
request_handle + model_name_offset,
reinterpret_cast<char*>(infer_request_shm_ptr) + model_name_offset);
Expand All @@ -343,7 +349,7 @@ InferRequest::LoadFromSharedMemory(

return std::unique_ptr<InferRequest>(new InferRequest(
infer_request_shm, request_id_shm, requested_output_names_shm,
model_name_shm, input_tensors, parameters_shm));
model_name_shm, input_tensors, parameters_shm, infer_trace_shm));
}

InferRequest::InferRequest(
Expand All @@ -352,7 +358,8 @@ InferRequest::InferRequest(
std::vector<std::unique_ptr<PbString>>& requested_output_names_shm,
std::unique_ptr<PbString>& model_name_shm,
std::vector<std::shared_ptr<PbTensor>>& input_tensors,
std::unique_ptr<PbString>& parameters_shm)
std::unique_ptr<PbString>& parameters_shm,
std::unique_ptr<InferenceTrace>& infer_trace_shm)
: infer_request_shm_(std::move(infer_request_shm)),
request_id_shm_(std::move(request_id_shm)),
requested_output_names_shm_(std::move(requested_output_names_shm)),
Expand Down Expand Up @@ -393,7 +400,7 @@ InferRequest::InferRequest(
is_decoupled_ = infer_request_shm_ptr_->is_decoupled;
timeout_ = infer_request_shm_ptr_->timeout;
preferred_memory_ = infer_request_shm_ptr_->preferred_memory;
trace_ = infer_request_shm_ptr_->trace;
trace_ = InferenceTrace(infer_trace_shm);
request_release_flags_ = infer_request_shm_ptr_->request_release_flags;

#ifdef TRITON_PB_STUB
Expand Down
24 changes: 5 additions & 19 deletions src/infer_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <string>

#include "infer_response.h"
#include "infer_trace.h"
#include "pb_preferred_memory.h"
#include "pb_tensor.h"

Expand All @@ -42,22 +43,6 @@ namespace triton { namespace backend { namespace python {

class Stub;

//
// Inference Trace
//
struct InferenceTrace {
#ifndef TRITON_PB_STUB
TRITONSERVER_InferenceTrace* triton_trace_;
InferenceTrace(TRITONSERVER_InferenceTrace* triton_trace)
: triton_trace_(triton_trace)
{
}
#else
void* triton_trace_;
#endif
InferenceTrace() : triton_trace_(nullptr) {}
};

//
// Inference Request
//
Expand All @@ -72,7 +57,7 @@ struct InferRequestShm {
bool is_decoupled;
uint64_t timeout;
PreferredMemory preferred_memory;
InferenceTrace trace;
bi::managed_external_buffer::handle_t trace_shm_handle;
uint32_t request_release_flags;
};

Expand Down Expand Up @@ -104,7 +89,7 @@ class InferRequest {
bool IsDecoupled();
void SetIsDecoupled(const bool is_decoupled);
PreferredMemory& GetPreferredMemory();
InferenceTrace& Trace();
InferenceTrace& GetTrace();
uint32_t ReleaseFlags();
void SetReleaseFlags(const uint32_t& flags);

Expand Down Expand Up @@ -144,7 +129,8 @@ class InferRequest {
std::vector<std::unique_ptr<PbString>>& requested_output_names_shm,
std::unique_ptr<PbString>& model_name_shm,
std::vector<std::shared_ptr<PbTensor>>& input_tensors,
std::unique_ptr<PbString>& parameters_shm);
std::unique_ptr<PbString>& parameters_shm,
std::unique_ptr<InferenceTrace>& infer_trace_shm);

std::string request_id_;
uint64_t correlation_id_;
Expand Down
101 changes: 101 additions & 0 deletions src/infer_trace.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "infer_trace.h"

namespace triton { namespace backend { namespace python {

InferenceTrace::InferenceTrace(const InferenceTrace& rhs)
{
triton_trace_ = rhs.triton_trace_;
trace_context_ = rhs.trace_context_;
}

InferenceTrace&
InferenceTrace::operator=(const InferenceTrace& rhs)
{
triton_trace_ = rhs.triton_trace_;
trace_context_ = rhs.trace_context_;
return *this;
}

InferenceTrace::InferenceTrace(std::unique_ptr<InferenceTrace>& trace_shm)
{
triton_trace_ = trace_shm->triton_trace_;
trace_context_ = trace_shm->trace_context_;
}

void
InferenceTrace::SaveToSharedMemory(
std::unique_ptr<SharedMemoryManager>& shm_pool)
{
AllocatedSharedMemory<InferenceTraceShm> infer_trace_shm =
shm_pool->Construct<InferenceTraceShm>();
infer_trace_shm_ptr_ = infer_trace_shm.data_.get();

infer_trace_shm_ptr_->triton_trace = triton_trace_;

std::unique_ptr<PbString> trace_context_shm =
PbString::Create(shm_pool, trace_context_);

infer_trace_shm_ptr_->trace_context_shm_handle =
trace_context_shm->ShmHandle();

// Save the references to shared memory.
trace_context_shm_ = std::move(trace_context_shm);
infer_trace_shm_ = std::move(infer_trace_shm);
shm_handle_ = infer_trace_shm_.handle_;
}

std::unique_ptr<InferenceTrace>
InferenceTrace::LoadFromSharedMemory(
std::unique_ptr<SharedMemoryManager>& shm_pool,
bi::managed_external_buffer::handle_t handle)
{
AllocatedSharedMemory<InferenceTraceShm> infer_trace_shm =
shm_pool->Load<InferenceTraceShm>(handle);
InferenceTraceShm* infer_trace_shm_ptr = infer_trace_shm.data_.get();

std::unique_ptr<PbString> trace_context_shm = PbString::LoadFromSharedMemory(
shm_pool, infer_trace_shm_ptr->trace_context_shm_handle);

return std::unique_ptr<InferenceTrace>(
new InferenceTrace(infer_trace_shm, trace_context_shm));
}

InferenceTrace::InferenceTrace(
AllocatedSharedMemory<InferenceTraceShm>& infer_trace_shm,
std::unique_ptr<PbString>& trace_context_shm)
: infer_trace_shm_(std::move(infer_trace_shm)),
trace_context_shm_(std::move(trace_context_shm))
{
infer_trace_shm_ptr_ = infer_trace_shm_.data_.get();
shm_handle_ = infer_trace_shm_.handle_;
triton_trace_ = infer_trace_shm_ptr_->triton_trace;
trace_context_ = trace_context_shm_->String();
}

}}}; // namespace triton::backend::python
90 changes: 90 additions & 0 deletions src/infer_trace.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#pragma once

#include <string>

#include "pb_string.h"
#include "pb_utils.h"

namespace triton { namespace backend { namespace python {

struct InferenceTraceShm {
bi::managed_external_buffer::handle_t trace_context_shm_handle;
// The address of the 'TRITONSERVER_InferTrace' object.
void* triton_trace;
};

//
// Inference Trace
//
class InferenceTrace {
public:
InferenceTrace(void* triton_trace, const std::string& ctxt)
: triton_trace_(triton_trace), trace_context_(ctxt)
{
}
InferenceTrace() : triton_trace_(nullptr), trace_context_("") {}
InferenceTrace(const InferenceTrace& rhs);
InferenceTrace(std::unique_ptr<InferenceTrace>& trace_shm);
InferenceTrace& operator=(const InferenceTrace& rhs);
/// Save InferenceTrace object to shared memory.
/// \param shm_pool Shared memory pool to save the InferenceTrace object.
void SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool);

/// Create a InferenceTrace object from shared memory.
/// \param shm_pool Shared memory pool
/// \param handle Shared memory handle of the InferenceTrace.
/// \return Returns the InferenceTrace in the specified handle
/// location.
static std::unique_ptr<InferenceTrace> LoadFromSharedMemory(
std::unique_ptr<SharedMemoryManager>& shm_pool,
bi::managed_external_buffer::handle_t handle);

void* TritonTrace() { return triton_trace_; }
const std::string& Context() const { return trace_context_; }

bi::managed_external_buffer::handle_t ShmHandle() { return shm_handle_; }

private:
// The private constructor for creating a InferenceTrace object from shared
// memory.
InferenceTrace(
AllocatedSharedMemory<InferenceTraceShm>& infer_trace_shm,
std::unique_ptr<PbString>& trace_context_shm);

void* triton_trace_;
std::string trace_context_;

// Shared Memory Data Structures
AllocatedSharedMemory<InferenceTraceShm> infer_trace_shm_;
InferenceTraceShm* infer_trace_shm_ptr_;
bi::managed_external_buffer::handle_t shm_handle_;
std::unique_ptr<PbString> trace_context_shm_;
};

}}}; // namespace triton::backend::python
11 changes: 9 additions & 2 deletions src/pb_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1610,7 +1610,14 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
.export_values();

py::class_<InferenceTrace, std::shared_ptr<InferenceTrace>>(
module, "InferenceTrace");
module, "InferenceTrace")
.def("get_context", [](InferenceTrace& self) -> py::object {
auto context = self.Context();
if (context != "") {
return py::str(context);
}
return py::none();
});

py::class_<InferRequest, std::shared_ptr<InferRequest>>(
module, "InferenceRequest")
Expand Down Expand Up @@ -1674,7 +1681,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
.def("set_flags", &InferRequest::SetFlags)
.def("timeout", &InferRequest::Timeout)
.def("parameters", &InferRequest::Parameters)
.def("trace", &InferRequest::Trace)
.def("trace", &InferRequest::GetTrace)
.def(
"exec",
[](std::shared_ptr<InferRequest>& infer_request,
Expand Down
14 changes: 12 additions & 2 deletions src/python_be.cc
Original file line number Diff line number Diff line change
Expand Up @@ -371,14 +371,25 @@ ModelInstanceState::SaveRequestsToSharedMemory(

// Do not return if error in this case, because Triton core
// will return an error if tracing is disabled (see PYBE PR#295).
// For the same reason, we do not log the error message, otherwise
// when Triton is compiled without tracing, it'll constantly log
// this error.
TRITONSERVER_InferenceTrace* triton_trace;
auto err = TRITONBACKEND_RequestTrace(request, &triton_trace);
if (err != nullptr) {
triton_trace = nullptr;
TRITONSERVER_ErrorDelete(err);
}
const char* val = nullptr;
if (triton_trace != nullptr) {
LOG_IF_ERROR(
TRITONSERVER_InferenceTraceContext(triton_trace, &val),
"failed to retrieve trace context");
}
std::string context = (val != nullptr) ? std::string(val) : "";

InferenceTrace trace = InferenceTrace(triton_trace);
InferenceTrace trace =
InferenceTrace(reinterpret_cast<void*>(triton_trace), context);

uint64_t request_timeout;
RETURN_IF_ERROR(TRITONBACKEND_InferenceRequestTimeoutMicroseconds(
Expand All @@ -403,7 +414,6 @@ ModelInstanceState::SaveRequestsToSharedMemory(
reinterpret_cast<intptr_t>(request),
PreferredMemory(PreferredMemory::kDefault, 0), trace);
}

RETURN_IF_EXCEPTION(infer_request->SaveToSharedMemory(Stub()->ShmPool()));
requests_shm[r] = infer_request->ShmHandle();
pb_infer_requests.emplace_back(std::move(infer_request));
Expand Down
Loading

0 comments on commit ad1f7c1

Please sign in to comment.