Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
krishung5 committed Mar 6, 2024
1 parent ba616e2 commit 521874e
Show file tree
Hide file tree
Showing 8 changed files with 312 additions and 60 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ configure_file(src/libtriton_python.ldscript libtriton_python.ldscript COPYONLY)

set(
COMMON_SRCS
src/correlation_id.cc
src/correlation_id.h
src/infer_response.cc
src/infer_response.h
src/infer_request.cc
Expand Down
120 changes: 120 additions & 0 deletions src/correlation_id.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "correlation_id.h"

namespace triton { namespace backend { namespace python {

SequenceId::SequenceId()
: sequence_label_(""), sequence_index_(0), id_type_(CorrIdDataType::UINT64)
{
}

SequenceId::SequenceId(const std::string& sequence_label)
: sequence_label_(sequence_label), sequence_index_(0),
id_type_(CorrIdDataType::STRING)
{
}

SequenceId::SequenceId(uint64_t sequence_index)
: sequence_label_(""), sequence_index_(sequence_index),
id_type_(CorrIdDataType::UINT64)
{
}

SequenceId::SequenceId(const SequenceId& rhs)
{
sequence_index_ = rhs.sequence_index_;
id_type_ = rhs.id_type_;
sequence_label_ = rhs.sequence_label_;
}

// SequenceId::SequenceId(const std::unique_ptr<SequenceId>& rhs_ptr)
// {
// sequence_index_ = rhs_ptr->sequence_index_;
// id_type_ = rhs_ptr->id_type_;
// sequence_label_ = rhs_ptr->sequence_label_;
// }

SequenceId&
SequenceId::operator=(const SequenceId& rhs)
{
sequence_index_ = rhs.sequence_index_;
id_type_ = rhs.id_type_;
sequence_label_ = rhs.sequence_label_;
return *this;
}

void
SequenceId::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
{
AllocatedSharedMemory<SequenceIdShm> sequence_id_shm =
shm_pool->Construct<SequenceIdShm>();
sequence_id_shm_ptr_ = sequence_id_shm.data_.get();

std::unique_ptr<PbString> sequence_label_shm =
PbString::Create(shm_pool, sequence_label_);

sequence_id_shm_ptr_->sequence_index = sequence_index_;
sequence_id_shm_ptr_->sequence_label_shm_handle =
sequence_label_shm->ShmHandle();
sequence_id_shm_ptr_->id_type = id_type_;

// Save the references to shared memory.
sequence_id_shm_ = std::move(sequence_id_shm);
sequence_label_shm_ = std::move(sequence_label_shm);
shm_handle_ = sequence_id_shm_.handle_;
}

std::unique_ptr<SequenceId>
SequenceId::LoadFromSharedMemory(
std::unique_ptr<SharedMemoryManager>& shm_pool,
bi::managed_external_buffer::handle_t handle)
{
AllocatedSharedMemory<SequenceIdShm> sequence_id_shm =
shm_pool->Load<SequenceIdShm>(handle);
SequenceIdShm* sequence_id_shm_ptr = sequence_id_shm.data_.get();

std::unique_ptr<PbString> sequence_label_shm = PbString::LoadFromSharedMemory(
shm_pool, sequence_id_shm_ptr->sequence_label_shm_handle);

return std::unique_ptr<SequenceId>(
new SequenceId(sequence_id_shm, sequence_label_shm));
}

SequenceId::SequenceId(
AllocatedSharedMemory<SequenceIdShm>& sequence_id_shm,
std::unique_ptr<PbString>& sequence_label_shm)
: sequence_id_shm_(std::move(sequence_id_shm)),
sequence_label_shm_(std::move(sequence_label_shm))
{
sequence_id_shm_ptr_ = sequence_id_shm_.data_.get();
sequence_label_ = sequence_label_shm_->String();
sequence_index_ = sequence_id_shm_ptr_->sequence_index;
id_type_ = sequence_id_shm_ptr_->id_type;
}

}}}; // namespace triton::backend::python
99 changes: 99 additions & 0 deletions src/correlation_id.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#pragma once

#include <string>

#include "pb_string.h"
#include "pb_utils.h"

namespace triton { namespace backend { namespace python {

enum class CorrIdDataType { UINT64, STRING };

struct SequenceIdShm {
bi::managed_external_buffer::handle_t sequence_label_shm_handle;
uint64_t sequence_index;
CorrIdDataType id_type;
};

class SequenceId {
public:
SequenceId();
SequenceId(const std::string& sequence_label);
SequenceId(uint64_t sequence_index);
SequenceId(const SequenceId& rhs);
// SequenceId(const std::unique_ptr<SequenceId>& rhs_ptr);
SequenceId& operator=(const SequenceId& rhs);

// ~SequenceId(){};

/// Save SequenceId object to shared memory.
/// \param shm_pool Shared memory pool to save the SequenceId object.
void SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool);

/// Create a SequenceId object from shared memory.
/// \param shm_pool Shared memory pool
/// \param handle Shared memory handle of the SequenceId.
/// \return Returns the SequenceId in the specified request_handle
/// location.
static std::unique_ptr<SequenceId> LoadFromSharedMemory(
std::unique_ptr<SharedMemoryManager>& shm_pool,
bi::managed_external_buffer::handle_t handle);

// Functions that help determine exact type of sequence Id
CorrIdDataType Type() const { return id_type_; }
bool InSequence() const
{
return ((sequence_label_ != "") || (sequence_index_ != 0));
}

// Get the value of the SequenceId based on the type
const std::string& StringValue() const { return sequence_label_; }
uint64_t UnsignedIntValue() const { return sequence_index_; }

bi::managed_external_buffer::handle_t ShmHandle() { return shm_handle_; }

private:
// The private constructor for creating a SequenceId object from shared
// memory.
SequenceId(
AllocatedSharedMemory<SequenceIdShm>& sequence_id_shm,
std::unique_ptr<PbString>& sequence_label_shm);

std::string sequence_label_;
uint64_t sequence_index_;
CorrIdDataType id_type_;

// Shared Memory Data Structures
AllocatedSharedMemory<SequenceIdShm> sequence_id_shm_;
SequenceIdShm* sequence_id_shm_ptr_;
bi::managed_external_buffer::handle_t shm_handle_;
std::unique_ptr<PbString> sequence_label_shm_;
};

}}}; // namespace triton::backend::python
86 changes: 37 additions & 49 deletions src/infer_request.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
namespace triton { namespace backend { namespace python {

InferRequest::InferRequest(
const std::string& request_id, uint64_t correlation_id,
const std::string& request_id, const SequenceId& correlation_id,
const std::vector<std::shared_ptr<PbTensor>>& inputs,
const std::set<std::string>& requested_output_names,
const std::string& model_name, const int64_t model_version,
Expand Down Expand Up @@ -97,7 +97,7 @@ InferRequest::RequestId()
return request_id_;
}

uint64_t
SequenceId&
InferRequest::CorrelationId()
{
return correlation_id_;
Expand Down Expand Up @@ -196,14 +196,13 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
sizeof(InferRequestShm) +
(RequestedOutputNames().size() *
sizeof(bi::managed_external_buffer::handle_t)) +
(Inputs().size() * sizeof(bi::managed_external_buffer::handle_t)) +
PbString::ShmStructSize(ModelName()) +
PbString::ShmStructSize(RequestId()) +
PbString::ShmStructSize(Parameters()));
(Inputs().size() * sizeof(bi::managed_external_buffer::handle_t)));

infer_request_shm_ptr_ =
reinterpret_cast<InferRequestShm*>(infer_request_shm.data_.get());
infer_request_shm_ptr_->correlation_id = CorrelationId();
correlation_id_.SaveToSharedMemory(shm_pool);
infer_request_shm_ptr_->correlation_id_shm_handle =
correlation_id_.ShmHandle();
infer_request_shm_ptr_->input_count = Inputs().size();
infer_request_shm_ptr_->model_version = model_version_;
infer_request_shm_ptr_->requested_output_count =
Expand Down Expand Up @@ -246,30 +245,17 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
i++;
}

size_t model_name_offset =
sizeof(InferRequestShm) +
(RequestedOutputNames().size() *
sizeof(bi::managed_external_buffer::handle_t)) +
(Inputs().size() * sizeof(bi::managed_external_buffer::handle_t));

std::unique_ptr<PbString> model_name_shm = PbString::Create(
ModelName(),
reinterpret_cast<char*>(infer_request_shm_ptr_) + model_name_offset,
infer_request_shm.handle_ + model_name_offset);

size_t request_id_offset =
model_name_offset + PbString::ShmStructSize(ModelName());
std::unique_ptr<PbString> request_id_shm = PbString::Create(
RequestId(),
reinterpret_cast<char*>(infer_request_shm_ptr_) + request_id_offset,
infer_request_shm.handle_ + request_id_offset);

size_t parameters_offset =
request_id_offset + PbString::ShmStructSize(RequestId());
std::unique_ptr<PbString> parameters_shm = PbString::Create(
Parameters(),
reinterpret_cast<char*>(infer_request_shm_ptr_) + parameters_offset,
infer_request_shm.handle_ + parameters_offset);
std::unique_ptr<PbString> model_name_shm =
PbString::Create(shm_pool, ModelName());

std::unique_ptr<PbString> request_id_shm =
PbString::Create(shm_pool, RequestId());

std::unique_ptr<PbString> parameters_shm =
PbString::Create(shm_pool, Parameters());
infer_request_shm_ptr_->model_name_shm_handle = model_name_shm->ShmHandle();
infer_request_shm_ptr_->request_id_shm_handle = request_id_shm->ShmHandle();
infer_request_shm_ptr_->parameters_shm_handle = parameters_shm->ShmHandle();

// Save the references to shared memory.
infer_request_shm_ = std::move(infer_request_shm);
Expand Down Expand Up @@ -321,34 +307,27 @@ InferRequest::LoadFromSharedMemory(
input_tensors.emplace_back(std::move(input_tensor));
}

size_t model_name_offset =
sizeof(InferRequestShm) +
(requested_output_count * sizeof(bi::managed_external_buffer::handle_t)) +
(infer_request_shm_ptr->input_count *
sizeof(bi::managed_external_buffer::handle_t));
std::unique_ptr<SequenceId> correlation_id_shm =
SequenceId::LoadFromSharedMemory(
shm_pool, infer_request_shm_ptr->correlation_id_shm_handle);

std::unique_ptr<PbString> model_name_shm = PbString::LoadFromSharedMemory(
request_handle + model_name_offset,
reinterpret_cast<char*>(infer_request_shm_ptr) + model_name_offset);

size_t request_id_offset = model_name_offset + model_name_shm->Size();
shm_pool, infer_request_shm_ptr->model_name_shm_handle);
std::unique_ptr<PbString> request_id_shm = PbString::LoadFromSharedMemory(
request_handle + request_id_offset,
reinterpret_cast<char*>(infer_request_shm_ptr) + request_id_offset);

size_t parameters_offset = request_id_offset + request_id_shm->Size();
shm_pool, infer_request_shm_ptr->request_id_shm_handle);
std::unique_ptr<PbString> parameters_shm = PbString::LoadFromSharedMemory(
request_handle + request_id_offset,
reinterpret_cast<char*>(infer_request_shm_ptr) + parameters_offset);
shm_pool, infer_request_shm_ptr->parameters_shm_handle);

return std::unique_ptr<InferRequest>(new InferRequest(
infer_request_shm, request_id_shm, requested_output_names_shm,
model_name_shm, input_tensors, parameters_shm));
infer_request_shm, request_id_shm, correlation_id_shm,
requested_output_names_shm, model_name_shm, input_tensors,
parameters_shm));
}

InferRequest::InferRequest(
AllocatedSharedMemory<char>& infer_request_shm,
std::unique_ptr<PbString>& request_id_shm,
std::unique_ptr<SequenceId>& correlation_id_shm,
std::vector<std::unique_ptr<PbString>>& requested_output_names_shm,
std::unique_ptr<PbString>& model_name_shm,
std::vector<std::shared_ptr<PbTensor>>& input_tensors,
Expand Down Expand Up @@ -387,7 +366,6 @@ InferRequest::InferRequest(
model_name_ = model_name_shm_->String();
flags_ = infer_request_shm_ptr_->flags;
model_version_ = infer_request_shm_ptr_->model_version;
correlation_id_ = infer_request_shm_ptr_->correlation_id;
request_address_ = infer_request_shm_ptr_->address;
response_factory_address_ = infer_request_shm_ptr_->response_factory_address;
is_decoupled_ = infer_request_shm_ptr_->is_decoupled;
Expand All @@ -396,6 +374,16 @@ InferRequest::InferRequest(
trace_ = infer_request_shm_ptr_->trace;
request_release_flags_ = infer_request_shm_ptr_->request_release_flags;

if (correlation_id_shm->Type() == CorrIdDataType::STRING) {
correlation_id_ = SequenceId(correlation_id_shm->StringValue());
std::cerr << "=== InferRequest::InferRequest: correlation_id_ = "
<< correlation_id_.StringValue() << std::endl;
} else {
correlation_id_ = SequenceId(correlation_id_shm->UnsignedIntValue());
std::cerr << "=== InferRequest::InferRequest: correlation_id_ = "
<< correlation_id_.UnsignedIntValue() << std::endl;
}

#ifdef TRITON_PB_STUB
pb_cancel_ =
std::make_shared<PbCancel>(response_factory_address_, request_address_);
Expand Down
Loading

0 comments on commit 521874e

Please sign in to comment.