From 98a37fd78cbedbb386fad03697b5275140991769 Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Thu, 30 Jan 2025 12:52:49 -0800 Subject: [PATCH 1/9] feat: Extend response parameters support to BLS in python backend --- .gitignore | 2 ++ README.md | 5 ++-- src/request_executor.cc | 52 +++++++++++++++++++++++++++++++++++++---- 3 files changed, 53 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 293f6455..bafd2974 100644 --- a/.gitignore +++ b/.gitignore @@ -138,3 +138,5 @@ dmypy.json # pytype static type analyzer .pytype/ +# vscode +.vscode/settings.json diff --git a/README.md b/README.md index a6242a44..e1140b26 100644 --- a/README.md +++ b/README.md @@ -803,8 +803,9 @@ You can read more about the inference response parameters in the [parameters extension](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_parameters.md) documentation. -Inference response parameters is currently not supported on BLS inference -responses received by BLS models. +Inference response parameters is supported when using BLS as well, i.e. when +using BLS to call another model A, you can access the the optional parameters +if set by A in its response. ## Managing Python Runtime and Libraries diff --git a/src/request_executor.cc b/src/request_executor.cc index c197948d..189b67d9 100644 --- a/src/request_executor.cc +++ b/src/request_executor.cc @@ -84,6 +84,7 @@ InferResponseComplete( std::unique_ptr infer_response; std::vector> output_tensors; std::shared_ptr pb_error; + std::string parameters_string; if (response != nullptr) { try { @@ -153,21 +154,64 @@ InferResponseComplete( output_tensors.clear(); } - // TODO: [DLIS-7864] Pass response parameters from BLS response. + try { + triton::common::TritonJson::Value parameters_json( + triton::common::TritonJson::ValueType::OBJECT); + uint32_t parameter_count; + THROW_IF_TRITON_ERROR( + TRITONSERVER_InferenceResponseParameterCount(response, ¶meter_count)); + for (size_t i = 0; i < parameter_count; i++) { + const char* name; + TRITONSERVER_ParameterType type; + const void* vvalue; + THROW_IF_TRITON_ERROR( + TRITONSERVER_InferenceResponseParameter(response, i, &name, &type, &vvalue)); + if (type == TRITONSERVER_PARAMETER_INT) { + THROW_IF_TRITON_ERROR(parameters_json.AddInt( + name, *(reinterpret_cast(vvalue)))); + } else if (type == TRITONSERVER_PARAMETER_BOOL) { + THROW_IF_TRITON_ERROR(parameters_json.AddBool( + name, *(reinterpret_cast(vvalue)))); + } else if (type == TRITONSERVER_PARAMETER_STRING) { + std::string string = reinterpret_cast(vvalue); + THROW_IF_TRITON_ERROR(parameters_json.AddString(name, string)); + } else if (type == TRITONSERVER_PARAMETER_DOUBLE) { + THROW_IF_TRITON_ERROR(parameters_json.AddDouble( + name, *(reinterpret_cast(vvalue)))); + } else { + throw PythonBackendException((std::string("Unsupported parameter type for parameter '") + name + "'.")) + } + } + + triton::common::TritonJson::WriteBuffer buffer; + THROW_IF_TRITON_ERROR(parameters_json.Write(&buffer)); + parameters_string = buffer.Contents(); + } + catch (const PythonBackendException& pb_exception) { + if (response != nullptr) { + LOG_IF_ERROR( + TRITONSERVER_InferenceResponseDelete(response), + "Failed to delete inference response."); + + response = nullptr; + } + pb_error = std::make_shared(pb_exception.what()); + } + if (!infer_payload->IsDecoupled()) { infer_response = std::make_unique( - output_tensors, pb_error, "" /* parameters */, + output_tensors, pb_error, parameters_string, true /* is_last_response */); } else { if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) == 0) { // Not the last response. infer_response = std::make_unique( - output_tensors, pb_error, "" /* parameters */, + output_tensors, pb_error, parameters_string, false /* is_last_response */, userp /* id */); } else { // The last response. infer_response = std::make_unique( - output_tensors, pb_error, "" /* parameters */, + output_tensors, pb_error, parameters_string, true /* is_last_response */, userp /* id */); } } From 84f6197469436012b395f05922460f6f81e05585 Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Thu, 30 Jan 2025 12:52:49 -0800 Subject: [PATCH 2/9] feat: Extend response parameters support to BLS in python backend --- src/request_executor.cc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/request_executor.cc b/src/request_executor.cc index 189b67d9..f77a59fb 100644 --- a/src/request_executor.cc +++ b/src/request_executor.cc @@ -158,14 +158,14 @@ InferResponseComplete( triton::common::TritonJson::Value parameters_json( triton::common::TritonJson::ValueType::OBJECT); uint32_t parameter_count; - THROW_IF_TRITON_ERROR( - TRITONSERVER_InferenceResponseParameterCount(response, ¶meter_count)); + THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceResponseParameterCount( + response, ¶meter_count)); for (size_t i = 0; i < parameter_count; i++) { const char* name; TRITONSERVER_ParameterType type; const void* vvalue; - THROW_IF_TRITON_ERROR( - TRITONSERVER_InferenceResponseParameter(response, i, &name, &type, &vvalue)); + THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceResponseParameter( + response, i, &name, &type, &vvalue)); if (type == TRITONSERVER_PARAMETER_INT) { THROW_IF_TRITON_ERROR(parameters_json.AddInt( name, *(reinterpret_cast(vvalue)))); @@ -179,7 +179,9 @@ InferResponseComplete( THROW_IF_TRITON_ERROR(parameters_json.AddDouble( name, *(reinterpret_cast(vvalue)))); } else { - throw PythonBackendException((std::string("Unsupported parameter type for parameter '") + name + "'.")) + throw PythonBackendException( + (std::string("Unsupported parameter type for parameter '") + + name + "'.")) } } @@ -197,7 +199,11 @@ InferResponseComplete( } pb_error = std::make_shared(pb_exception.what()); } +<<<<<<< HEAD +======= + +>>>>>>> 408c23b (feat: Extend response parameters support to BLS in python backend) if (!infer_payload->IsDecoupled()) { infer_response = std::make_unique( output_tensors, pb_error, parameters_string, From 0c7c572a937340fc8fb21efd023bf9a371f479e8 Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Thu, 30 Jan 2025 13:02:29 -0800 Subject: [PATCH 3/9] clean up --- src/request_executor.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/request_executor.cc b/src/request_executor.cc index f77a59fb..76d4ad63 100644 --- a/src/request_executor.cc +++ b/src/request_executor.cc @@ -199,11 +199,7 @@ InferResponseComplete( } pb_error = std::make_shared(pb_exception.what()); } -<<<<<<< HEAD - -======= ->>>>>>> 408c23b (feat: Extend response parameters support to BLS in python backend) if (!infer_payload->IsDecoupled()) { infer_response = std::make_unique( output_tensors, pb_error, parameters_string, From 99d74a5c2f5d0918085b0a604b5ca174652eaa45 Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Thu, 30 Jan 2025 16:09:58 -0800 Subject: [PATCH 4/9] fix --- src/request_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/request_executor.cc b/src/request_executor.cc index 76d4ad63..a035fa60 100644 --- a/src/request_executor.cc +++ b/src/request_executor.cc @@ -181,7 +181,7 @@ InferResponseComplete( } else { throw PythonBackendException( (std::string("Unsupported parameter type for parameter '") + - name + "'.")) + name + "'.")); } } From c6a9930f98161376c952f7454ffb74ba31362e99 Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Mon, 3 Feb 2025 14:51:33 -0800 Subject: [PATCH 5/9] add cerr --- src/request_executor.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/request_executor.cc b/src/request_executor.cc index a035fa60..88fdafa9 100644 --- a/src/request_executor.cc +++ b/src/request_executor.cc @@ -157,6 +157,7 @@ InferResponseComplete( try { triton::common::TritonJson::Value parameters_json( triton::common::TritonJson::ValueType::OBJECT); + std::cerr << "debug ziqif: response = " << response << std::endl; uint32_t parameter_count; THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceResponseParameterCount( response, ¶meter_count)); From 3a2182f338885ac3f41493b99b10882ecead4b34 Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Mon, 3 Feb 2025 15:18:45 -0800 Subject: [PATCH 6/9] move code --- src/request_executor.cc | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/src/request_executor.cc b/src/request_executor.cc index 88fdafa9..a3aff0f2 100644 --- a/src/request_executor.cc +++ b/src/request_executor.cc @@ -141,23 +141,9 @@ InferResponseComplete( output_tensors.push_back(pb_tensor); } } - } - catch (const PythonBackendException& pb_exception) { - if (response != nullptr) { - LOG_IF_ERROR( - TRITONSERVER_InferenceResponseDelete(response), - "Failed to delete inference response."); - response = nullptr; - } - pb_error = std::make_shared(pb_exception.what()); - output_tensors.clear(); - } - - try { triton::common::TritonJson::Value parameters_json( triton::common::TritonJson::ValueType::OBJECT); - std::cerr << "debug ziqif: response = " << response << std::endl; uint32_t parameter_count; THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceResponseParameterCount( response, ¶meter_count)); @@ -185,10 +171,6 @@ InferResponseComplete( name + "'.")); } } - - triton::common::TritonJson::WriteBuffer buffer; - THROW_IF_TRITON_ERROR(parameters_json.Write(&buffer)); - parameters_string = buffer.Contents(); } catch (const PythonBackendException& pb_exception) { if (response != nullptr) { @@ -199,6 +181,7 @@ InferResponseComplete( response = nullptr; } pb_error = std::make_shared(pb_exception.what()); + output_tensors.clear(); } if (!infer_payload->IsDecoupled()) { From 93372720386672cbdcc63ffddadae10c1b700873 Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Mon, 3 Feb 2025 15:19:46 -0800 Subject: [PATCH 7/9] add missing code --- src/request_executor.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/request_executor.cc b/src/request_executor.cc index a3aff0f2..a97bbfd4 100644 --- a/src/request_executor.cc +++ b/src/request_executor.cc @@ -171,6 +171,9 @@ InferResponseComplete( name + "'.")); } } + triton::common::TritonJson::WriteBuffer buffer; + THROW_IF_TRITON_ERROR(parameters_json.Write(&buffer)); + parameters_string = buffer.Contents(); } catch (const PythonBackendException& pb_exception) { if (response != nullptr) { From 13b07fafa7fd6d30ffa37aa0db1d2c99a65d7798 Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Tue, 4 Feb 2025 12:26:53 -0800 Subject: [PATCH 8/9] remove double --- src/request_executor.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/request_executor.cc b/src/request_executor.cc index a97bbfd4..3c51e626 100644 --- a/src/request_executor.cc +++ b/src/request_executor.cc @@ -147,6 +147,7 @@ InferResponseComplete( uint32_t parameter_count; THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceResponseParameterCount( response, ¶meter_count)); + for (size_t i = 0; i < parameter_count; i++) { const char* name; TRITONSERVER_ParameterType type; @@ -162,15 +163,13 @@ InferResponseComplete( } else if (type == TRITONSERVER_PARAMETER_STRING) { std::string string = reinterpret_cast(vvalue); THROW_IF_TRITON_ERROR(parameters_json.AddString(name, string)); - } else if (type == TRITONSERVER_PARAMETER_DOUBLE) { - THROW_IF_TRITON_ERROR(parameters_json.AddDouble( - name, *(reinterpret_cast(vvalue)))); } else { throw PythonBackendException( (std::string("Unsupported parameter type for parameter '") + name + "'.")); } } + triton::common::TritonJson::WriteBuffer buffer; THROW_IF_TRITON_ERROR(parameters_json.Write(&buffer)); parameters_string = buffer.Contents(); From 8eef149a80260f16f96c00065f6918cb77203e11 Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Tue, 4 Feb 2025 16:49:05 -0800 Subject: [PATCH 9/9] Update README.md Co-authored-by: Jacky <18255193+kthui@users.noreply.github.com> --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e1140b26..b00dc0bf 100644 --- a/README.md +++ b/README.md @@ -803,9 +803,11 @@ You can read more about the inference response parameters in the [parameters extension](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_parameters.md) documentation. -Inference response parameters is supported when using BLS as well, i.e. when -using BLS to call another model A, you can access the the optional parameters -if set by A in its response. +The parameters associated with an inference response can be retrieved using the +`inference_response.parameters()` function. This function returns a JSON string +where the keys are the keys of the parameters object and the values are the +values for the parameters field. Note that you need to parse this string using +`json.loads` to convert it to a dictionary. ## Managing Python Runtime and Libraries