From e63594fef33969599004b21535b865b81496d601 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Mon, 10 Jun 2024 12:57:36 -0700 Subject: [PATCH 1/3] Prune non requested outputs from non-decoupled models --- src/infer_request.cc | 10 ++++++---- src/response_sender.cc | 34 +++++++++++++++++++++++++++------- src/response_sender.h | 8 +++++++- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index 57ea6cf1..7890b489 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -68,14 +68,15 @@ InferRequest::InferRequest( } } - inputs_ = inputs; - requested_output_names_ = requested_output_names; + inputs_ = inputs; // TODO: do we need this? + requested_output_names_ = requested_output_names; // TODO: do we need this? #ifdef TRITON_PB_STUB pb_cancel_ = std::make_shared(response_factory_address_, request_address_); response_sender_ = std::make_shared( request_address_, response_factory_address_, nullptr /* is_decoupled */, - Stub::GetOrCreateInstance()->SharedMemory(), pb_cancel_); + RequestedOutputNames(), Stub::GetOrCreateInstance()->SharedMemory(), + pb_cancel_); #endif } @@ -390,7 +391,8 @@ InferRequest::InferRequest( std::make_shared(response_factory_address_, request_address_); response_sender_ = std::make_shared( request_address_, response_factory_address_, is_model_decoupled, - Stub::GetOrCreateInstance()->SharedMemory(), pb_cancel_); + RequestedOutputNames(), Stub::GetOrCreateInstance()->SharedMemory(), + pb_cancel_); #endif } diff --git a/src/response_sender.cc b/src/response_sender.cc index 74914ab4..b8ac4603 100644 --- a/src/response_sender.cc +++ b/src/response_sender.cc @@ -54,12 +54,15 @@ CheckResponseSenderArguments( ResponseSender::ResponseSender( intptr_t request_address, intptr_t response_factory_address, - bool const* is_decoupled, std::unique_ptr& shm_pool, + bool const* is_decoupled, + const std::set& requested_output_names, + std::unique_ptr& shm_pool, const std::shared_ptr& pb_cancel) : request_address_(request_address), response_factory_address_(response_factory_address), - is_decoupled_(is_decoupled), shm_pool_(shm_pool), pb_cancel_(pb_cancel), - closed_(false), number_of_response_sent_(0) + is_decoupled_(is_decoupled), + requested_output_names_(requested_output_names), shm_pool_(shm_pool), + pb_cancel_(pb_cancel), closed_(false), number_of_response_sent_(0) { } @@ -71,9 +74,8 @@ ResponseSender::~ResponseSender() PYTHONSTUB_DecoupledResponseFactoryCleanup); } -void -ResponseSender::UpdateStateAndCounters( - const std::shared_ptr& response, const uint32_t flags) +bool +ResponseSender::IsDecoupled() const { if (is_decoupled_ == nullptr) { // TODO: Can a model access the response sender on a BLS infer request? @@ -81,7 +83,14 @@ ResponseSender::UpdateStateAndCounters( "Unable to send response. Response sender has no reference to the " "decoupled state of the model."); } - bool is_decoupled = *is_decoupled_; + return *is_decoupled_; +} + +void +ResponseSender::UpdateStateAndCounters( + const std::shared_ptr& response, const uint32_t flags) +{ + bool is_decoupled = IsDecoupled(); std::lock_guard lk(mu_); @@ -110,6 +119,16 @@ ResponseSender::UpdateStateAndCounters( number_of_response_sent_++; } +void +ResponseSender::PruneNonRequestedOutputs( + const std::shared_ptr& infer_response) const +{ + // TODO: should this be limited to non decoupled only? + if (!IsDecoupled() && infer_response) { + infer_response->PruneOutputTensors(requested_output_names_); + } +} + void ResponseSender::Send( std::shared_ptr infer_response, const uint32_t flags) @@ -123,6 +142,7 @@ ResponseSender::Send( CheckResponseSenderArguments(infer_response, flags); UpdateStateAndCounters(infer_response, flags); + PruneNonRequestedOutputs(infer_response); std::unique_ptr& stub = Stub::GetOrCreateInstance(); diff --git a/src/response_sender.h b/src/response_sender.h index 1b57508e..05ad8069 100644 --- a/src/response_sender.h +++ b/src/response_sender.h @@ -38,7 +38,9 @@ class ResponseSender { public: ResponseSender( intptr_t request_address, intptr_t response_factory_address, - bool const* is_decoupled, std::unique_ptr& shm_pool, + bool const* is_decoupled, + const std::set& requested_output_names, + std::unique_ptr& shm_pool, const std::shared_ptr& pb_cancel); ~ResponseSender(); void Send(std::shared_ptr response, const uint32_t flags); @@ -48,12 +50,16 @@ class ResponseSender { void Close(); private: + bool IsDecoupled() const; void UpdateStateAndCounters( const std::shared_ptr& response, const uint32_t flags); + void PruneNonRequestedOutputs( + const std::shared_ptr& infer_response) const; intptr_t request_address_; intptr_t response_factory_address_; bool const* is_decoupled_; + std::set requested_output_names_; std::unique_ptr& shm_pool_; std::shared_ptr pb_cancel_; From 37c199ea911be7eda1e899328353fe569a91a752 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:53:44 -0700 Subject: [PATCH 2/3] Prune non requested outputs from decoupled models --- src/response_sender.cc | 28 +++++++--------------------- src/response_sender.h | 3 --- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/src/response_sender.cc b/src/response_sender.cc index b8ac4603..1831601f 100644 --- a/src/response_sender.cc +++ b/src/response_sender.cc @@ -74,8 +74,9 @@ ResponseSender::~ResponseSender() PYTHONSTUB_DecoupledResponseFactoryCleanup); } -bool -ResponseSender::IsDecoupled() const +void +ResponseSender::UpdateStateAndCounters( + const std::shared_ptr& response, const uint32_t flags) { if (is_decoupled_ == nullptr) { // TODO: Can a model access the response sender on a BLS infer request? @@ -83,14 +84,7 @@ ResponseSender::IsDecoupled() const "Unable to send response. Response sender has no reference to the " "decoupled state of the model."); } - return *is_decoupled_; -} - -void -ResponseSender::UpdateStateAndCounters( - const std::shared_ptr& response, const uint32_t flags) -{ - bool is_decoupled = IsDecoupled(); + bool is_decoupled = *is_decoupled_; std::lock_guard lk(mu_); @@ -119,16 +113,6 @@ ResponseSender::UpdateStateAndCounters( number_of_response_sent_++; } -void -ResponseSender::PruneNonRequestedOutputs( - const std::shared_ptr& infer_response) const -{ - // TODO: should this be limited to non decoupled only? - if (!IsDecoupled() && infer_response) { - infer_response->PruneOutputTensors(requested_output_names_); - } -} - void ResponseSender::Send( std::shared_ptr infer_response, const uint32_t flags) @@ -142,7 +126,9 @@ ResponseSender::Send( CheckResponseSenderArguments(infer_response, flags); UpdateStateAndCounters(infer_response, flags); - PruneNonRequestedOutputs(infer_response); + if (infer_response) { + infer_response->PruneOutputTensors(requested_output_names_); + } std::unique_ptr& stub = Stub::GetOrCreateInstance(); diff --git a/src/response_sender.h b/src/response_sender.h index 05ad8069..f274f5b4 100644 --- a/src/response_sender.h +++ b/src/response_sender.h @@ -50,11 +50,8 @@ class ResponseSender { void Close(); private: - bool IsDecoupled() const; void UpdateStateAndCounters( const std::shared_ptr& response, const uint32_t flags); - void PruneNonRequestedOutputs( - const std::shared_ptr& infer_response) const; intptr_t request_address_; intptr_t response_factory_address_; From b745ae54906a2b36c72a6f5a29b38411f99f06f9 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Tue, 11 Jun 2024 12:13:59 -0700 Subject: [PATCH 3/3] [chore] Remove redundant copy --- src/infer_request.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index 7890b489..8a95b524 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -68,8 +68,6 @@ InferRequest::InferRequest( } } - inputs_ = inputs; // TODO: do we need this? - requested_output_names_ = requested_output_names; // TODO: do we need this? #ifdef TRITON_PB_STUB pb_cancel_ = std::make_shared(response_factory_address_, request_address_);