Initial commit

triton-inference-server · Mar 6, 2024 · 521874e · 521874e
1 parent ba616e2
commit 521874e
Show file tree

Hide file tree

Showing 8 changed files with 312 additions and 60 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -148,6 +148,8 @@ configure_file(src/libtriton_python.ldscript libtriton_python.ldscript COPYONLY)
 
 set(
   COMMON_SRCS
+  src/correlation_id.cc
+  src/correlation_id.h
   src/infer_response.cc
   src/infer_response.h
   src/infer_request.cc

diff --git a/src/correlation_id.cc b/src/correlation_id.cc
@@ -0,0 +1,120 @@
+// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "correlation_id.h"
+
+namespace triton { namespace backend { namespace python {
+
+SequenceId::SequenceId()
+    : sequence_label_(""), sequence_index_(0), id_type_(CorrIdDataType::UINT64)
+{
+}
+
+SequenceId::SequenceId(const std::string& sequence_label)
+    : sequence_label_(sequence_label), sequence_index_(0),
+      id_type_(CorrIdDataType::STRING)
+{
+}
+
+SequenceId::SequenceId(uint64_t sequence_index)
+    : sequence_label_(""), sequence_index_(sequence_index),
+      id_type_(CorrIdDataType::UINT64)
+{
+}
+
+SequenceId::SequenceId(const SequenceId& rhs)
+{
+  sequence_index_ = rhs.sequence_index_;
+  id_type_ = rhs.id_type_;
+  sequence_label_ = rhs.sequence_label_;
+}
+
+// SequenceId::SequenceId(const std::unique_ptr<SequenceId>& rhs_ptr)
+// {
+//   sequence_index_ = rhs_ptr->sequence_index_;
+//   id_type_ = rhs_ptr->id_type_;
+//   sequence_label_ = rhs_ptr->sequence_label_;
+// }
+
+SequenceId&
+SequenceId::operator=(const SequenceId& rhs)
+{
+  sequence_index_ = rhs.sequence_index_;
+  id_type_ = rhs.id_type_;
+  sequence_label_ = rhs.sequence_label_;
+  return *this;
+}
+
+void
+SequenceId::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
+{
+  AllocatedSharedMemory<SequenceIdShm> sequence_id_shm =
+      shm_pool->Construct<SequenceIdShm>();
+  sequence_id_shm_ptr_ = sequence_id_shm.data_.get();
+
+  std::unique_ptr<PbString> sequence_label_shm =
+      PbString::Create(shm_pool, sequence_label_);
+
+  sequence_id_shm_ptr_->sequence_index = sequence_index_;
+  sequence_id_shm_ptr_->sequence_label_shm_handle =
+      sequence_label_shm->ShmHandle();
+  sequence_id_shm_ptr_->id_type = id_type_;
+
+  // Save the references to shared memory.
+  sequence_id_shm_ = std::move(sequence_id_shm);
+  sequence_label_shm_ = std::move(sequence_label_shm);
+  shm_handle_ = sequence_id_shm_.handle_;
+}
+
+std::unique_ptr<SequenceId>
+SequenceId::LoadFromSharedMemory(
+    std::unique_ptr<SharedMemoryManager>& shm_pool,
+    bi::managed_external_buffer::handle_t handle)
+{
+  AllocatedSharedMemory<SequenceIdShm> sequence_id_shm =
+      shm_pool->Load<SequenceIdShm>(handle);
+  SequenceIdShm* sequence_id_shm_ptr = sequence_id_shm.data_.get();
+
+  std::unique_ptr<PbString> sequence_label_shm = PbString::LoadFromSharedMemory(
+      shm_pool, sequence_id_shm_ptr->sequence_label_shm_handle);
+
+  return std::unique_ptr<SequenceId>(
+      new SequenceId(sequence_id_shm, sequence_label_shm));
+}
+
+SequenceId::SequenceId(
+    AllocatedSharedMemory<SequenceIdShm>& sequence_id_shm,
+    std::unique_ptr<PbString>& sequence_label_shm)
+    : sequence_id_shm_(std::move(sequence_id_shm)),
+      sequence_label_shm_(std::move(sequence_label_shm))
+{
+  sequence_id_shm_ptr_ = sequence_id_shm_.data_.get();
+  sequence_label_ = sequence_label_shm_->String();
+  sequence_index_ = sequence_id_shm_ptr_->sequence_index;
+  id_type_ = sequence_id_shm_ptr_->id_type;
+}
+
+}}};  // namespace triton::backend::python
diff --git a/src/correlation_id.h b/src/correlation_id.h
@@ -0,0 +1,99 @@
+// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#pragma once
+
+#include <string>
+
+#include "pb_string.h"
+#include "pb_utils.h"
+
+namespace triton { namespace backend { namespace python {
+
+enum class CorrIdDataType { UINT64, STRING };
+
+struct SequenceIdShm {
+  bi::managed_external_buffer::handle_t sequence_label_shm_handle;
+  uint64_t sequence_index;
+  CorrIdDataType id_type;
+};
+
+class SequenceId {
+ public:
+  SequenceId();
+  SequenceId(const std::string& sequence_label);
+  SequenceId(uint64_t sequence_index);
+  SequenceId(const SequenceId& rhs);
+  // SequenceId(const std::unique_ptr<SequenceId>& rhs_ptr);
+  SequenceId& operator=(const SequenceId& rhs);
+
+  // ~SequenceId(){};
+
+  /// Save SequenceId object to shared memory.
+  /// \param shm_pool Shared memory pool to save the SequenceId object.
+  void SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool);
+
+  /// Create a SequenceId object from shared memory.
+  /// \param shm_pool Shared memory pool
+  /// \param handle Shared memory handle of the SequenceId.
+  /// \return Returns the SequenceId in the specified request_handle
+  /// location.
+  static std::unique_ptr<SequenceId> LoadFromSharedMemory(
+      std::unique_ptr<SharedMemoryManager>& shm_pool,
+      bi::managed_external_buffer::handle_t handle);
+
+  // Functions that help determine exact type of sequence Id
+  CorrIdDataType Type() const { return id_type_; }
+  bool InSequence() const
+  {
+    return ((sequence_label_ != "") || (sequence_index_ != 0));
+  }
+
+  // Get the value of the SequenceId based on the type
+  const std::string& StringValue() const { return sequence_label_; }
+  uint64_t UnsignedIntValue() const { return sequence_index_; }
+
+  bi::managed_external_buffer::handle_t ShmHandle() { return shm_handle_; }
+
+ private:
+  // The private constructor for creating a SequenceId object from shared
+  // memory.
+  SequenceId(
+      AllocatedSharedMemory<SequenceIdShm>& sequence_id_shm,
+      std::unique_ptr<PbString>& sequence_label_shm);
+
+  std::string sequence_label_;
+  uint64_t sequence_index_;
+  CorrIdDataType id_type_;
+
+  // Shared Memory Data Structures
+  AllocatedSharedMemory<SequenceIdShm> sequence_id_shm_;
+  SequenceIdShm* sequence_id_shm_ptr_;
+  bi::managed_external_buffer::handle_t shm_handle_;
+  std::unique_ptr<PbString> sequence_label_shm_;
+};
+
+}}};  // namespace triton::backend::python
diff --git a/src/infer_request.cc b/src/infer_request.cc
@@ -38,7 +38,7 @@
 namespace triton { namespace backend { namespace python {
 
 InferRequest::InferRequest(
-    const std::string& request_id, uint64_t correlation_id,
+    const std::string& request_id, const SequenceId& correlation_id,
     const std::vector<std::shared_ptr<PbTensor>>& inputs,
     const std::set<std::string>& requested_output_names,
     const std::string& model_name, const int64_t model_version,
@@ -97,7 +97,7 @@ InferRequest::RequestId()
   return request_id_;
 }
 
-uint64_t
+SequenceId&
 InferRequest::CorrelationId()
 {
   return correlation_id_;
@@ -196,14 +196,13 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
       sizeof(InferRequestShm) +
       (RequestedOutputNames().size() *
        sizeof(bi::managed_external_buffer::handle_t)) +
-      (Inputs().size() * sizeof(bi::managed_external_buffer::handle_t)) +
-      PbString::ShmStructSize(ModelName()) +
-      PbString::ShmStructSize(RequestId()) +
-      PbString::ShmStructSize(Parameters()));
+      (Inputs().size() * sizeof(bi::managed_external_buffer::handle_t)));
 
   infer_request_shm_ptr_ =
       reinterpret_cast<InferRequestShm*>(infer_request_shm.data_.get());
-  infer_request_shm_ptr_->correlation_id = CorrelationId();
+  correlation_id_.SaveToSharedMemory(shm_pool);
+  infer_request_shm_ptr_->correlation_id_shm_handle =
+      correlation_id_.ShmHandle();
   infer_request_shm_ptr_->input_count = Inputs().size();
   infer_request_shm_ptr_->model_version = model_version_;
   infer_request_shm_ptr_->requested_output_count =
@@ -246,30 +245,17 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
     i++;
   }
 
-  size_t model_name_offset =
-      sizeof(InferRequestShm) +
-      (RequestedOutputNames().size() *
-       sizeof(bi::managed_external_buffer::handle_t)) +
-      (Inputs().size() * sizeof(bi::managed_external_buffer::handle_t));
-
-  std::unique_ptr<PbString> model_name_shm = PbString::Create(
-      ModelName(),
-      reinterpret_cast<char*>(infer_request_shm_ptr_) + model_name_offset,
-      infer_request_shm.handle_ + model_name_offset);
-
-  size_t request_id_offset =
-      model_name_offset + PbString::ShmStructSize(ModelName());
-  std::unique_ptr<PbString> request_id_shm = PbString::Create(
-      RequestId(),
-      reinterpret_cast<char*>(infer_request_shm_ptr_) + request_id_offset,
-      infer_request_shm.handle_ + request_id_offset);
-
-  size_t parameters_offset =
-      request_id_offset + PbString::ShmStructSize(RequestId());
-  std::unique_ptr<PbString> parameters_shm = PbString::Create(
-      Parameters(),
-      reinterpret_cast<char*>(infer_request_shm_ptr_) + parameters_offset,
-      infer_request_shm.handle_ + parameters_offset);
+  std::unique_ptr<PbString> model_name_shm =
+      PbString::Create(shm_pool, ModelName());
+
+  std::unique_ptr<PbString> request_id_shm =
+      PbString::Create(shm_pool, RequestId());
+
+  std::unique_ptr<PbString> parameters_shm =
+      PbString::Create(shm_pool, Parameters());
+  infer_request_shm_ptr_->model_name_shm_handle = model_name_shm->ShmHandle();
+  infer_request_shm_ptr_->request_id_shm_handle = request_id_shm->ShmHandle();
+  infer_request_shm_ptr_->parameters_shm_handle = parameters_shm->ShmHandle();
 
   // Save the references to shared memory.
   infer_request_shm_ = std::move(infer_request_shm);
@@ -321,34 +307,27 @@ InferRequest::LoadFromSharedMemory(
     input_tensors.emplace_back(std::move(input_tensor));
   }
 
-  size_t model_name_offset =
-      sizeof(InferRequestShm) +
-      (requested_output_count * sizeof(bi::managed_external_buffer::handle_t)) +
-      (infer_request_shm_ptr->input_count *
-       sizeof(bi::managed_external_buffer::handle_t));
+  std::unique_ptr<SequenceId> correlation_id_shm =
+      SequenceId::LoadFromSharedMemory(
+          shm_pool, infer_request_shm_ptr->correlation_id_shm_handle);
 
   std::unique_ptr<PbString> model_name_shm = PbString::LoadFromSharedMemory(
-      request_handle + model_name_offset,
-      reinterpret_cast<char*>(infer_request_shm_ptr) + model_name_offset);
-
-  size_t request_id_offset = model_name_offset + model_name_shm->Size();
+      shm_pool, infer_request_shm_ptr->model_name_shm_handle);
   std::unique_ptr<PbString> request_id_shm = PbString::LoadFromSharedMemory(
-      request_handle + request_id_offset,
-      reinterpret_cast<char*>(infer_request_shm_ptr) + request_id_offset);
-
-  size_t parameters_offset = request_id_offset + request_id_shm->Size();
+      shm_pool, infer_request_shm_ptr->request_id_shm_handle);
   std::unique_ptr<PbString> parameters_shm = PbString::LoadFromSharedMemory(
-      request_handle + request_id_offset,
-      reinterpret_cast<char*>(infer_request_shm_ptr) + parameters_offset);
+      shm_pool, infer_request_shm_ptr->parameters_shm_handle);
 
   return std::unique_ptr<InferRequest>(new InferRequest(
-      infer_request_shm, request_id_shm, requested_output_names_shm,
-      model_name_shm, input_tensors, parameters_shm));
+      infer_request_shm, request_id_shm, correlation_id_shm,
+      requested_output_names_shm, model_name_shm, input_tensors,
+      parameters_shm));
 }
 
 InferRequest::InferRequest(
     AllocatedSharedMemory<char>& infer_request_shm,
     std::unique_ptr<PbString>& request_id_shm,
+    std::unique_ptr<SequenceId>& correlation_id_shm,
     std::vector<std::unique_ptr<PbString>>& requested_output_names_shm,
     std::unique_ptr<PbString>& model_name_shm,
     std::vector<std::shared_ptr<PbTensor>>& input_tensors,
@@ -387,7 +366,6 @@ InferRequest::InferRequest(
   model_name_ = model_name_shm_->String();
   flags_ = infer_request_shm_ptr_->flags;
   model_version_ = infer_request_shm_ptr_->model_version;
-  correlation_id_ = infer_request_shm_ptr_->correlation_id;
   request_address_ = infer_request_shm_ptr_->address;
   response_factory_address_ = infer_request_shm_ptr_->response_factory_address;
   is_decoupled_ = infer_request_shm_ptr_->is_decoupled;
@@ -396,6 +374,16 @@ InferRequest::InferRequest(
   trace_ = infer_request_shm_ptr_->trace;
   request_release_flags_ = infer_request_shm_ptr_->request_release_flags;
 
+  if (correlation_id_shm->Type() == CorrIdDataType::STRING) {
+    correlation_id_ = SequenceId(correlation_id_shm->StringValue());
+    std::cerr << "=== InferRequest::InferRequest: correlation_id_ = "
+              << correlation_id_.StringValue() << std::endl;
+  } else {
+    correlation_id_ = SequenceId(correlation_id_shm->UnsignedIntValue());
+    std::cerr << "=== InferRequest::InferRequest: correlation_id_ = "
+              << correlation_id_.UnsignedIntValue() << std::endl;
+  }
+
 #ifdef TRITON_PB_STUB
   pb_cancel_ =
       std::make_shared<PbCancel>(response_factory_address_, request_address_);