From d0bb0a090c863c94d1842572f79207ad54e4dccb Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Wed, 24 Jan 2024 22:56:07 +0530
Subject: [PATCH 1/8] Update correlation_id data type

---
 src/infer_request.cc    | 4 ++--
 src/infer_request.h     | 8 ++++----
 src/pb_stub.cc          | 4 ++--
 src/python_be.cc        | 4 ++--
 src/request_executor.cc | 2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/src/infer_request.cc b/src/infer_request.cc
index f18900d0..c0b17276 100644
--- a/src/infer_request.cc
+++ b/src/infer_request.cc
@@ -38,7 +38,7 @@
 namespace triton { namespace backend { namespace python {
 
 InferRequest::InferRequest(
-    const std::string& request_id, uint64_t correlation_id,
+    const std::string& request_id, const std::string& correlation_id,
     const std::vector<std::shared_ptr<PbTensor>>& inputs,
     const std::set<std::string>& requested_output_names,
     const std::string& model_name, const int64_t model_version,
@@ -97,7 +97,7 @@ InferRequest::RequestId()
   return request_id_;
 }
 
-uint64_t
+const std::string&
 InferRequest::CorrelationId()
 {
   return correlation_id_;
diff --git a/src/infer_request.h b/src/infer_request.h
index b8dee87c..b56beacc 100644
--- a/src/infer_request.h
+++ b/src/infer_request.h
@@ -62,7 +62,7 @@ struct InferenceTrace {
 // Inference Request
 //
 struct InferRequestShm {
-  uint64_t correlation_id;
+  std::string correlation_id;
   uint32_t input_count;
   uint32_t requested_output_count;
   int64_t model_version;
@@ -79,7 +79,7 @@ struct InferRequestShm {
 class InferRequest {
  public:
   InferRequest(
-      const std::string& request_id, uint64_t correlation_id,
+      const std::string& request_id, const std::string& correlation_id,
       const std::vector<std::shared_ptr<PbTensor>>& inputs,
       const std::set<std::string>& requested_output_names,
       const std::string& model_name, const int64_t model_version,
@@ -93,7 +93,7 @@ class InferRequest {
   const std::vector<std::shared_ptr<PbTensor>>& Inputs();
   const std::string& RequestId();
   const std::string& Parameters();
-  uint64_t CorrelationId();
+  const std::string& CorrelationId();
   const std::string& ModelName();
   int64_t ModelVersion();
   uint32_t Flags();
@@ -147,7 +147,7 @@ class InferRequest {
       std::unique_ptr<PbString>& parameters_shm);
 
   std::string request_id_;
-  uint64_t correlation_id_;
+  std::string correlation_id_;
   std::vector<std::shared_ptr<PbTensor>> inputs_;
   std::set<std::string> requested_output_names_;
   std::string model_name_;
diff --git a/src/pb_stub.cc b/src/pb_stub.cc
index a7d39852..e6727c93 100644
--- a/src/pb_stub.cc
+++ b/src/pb_stub.cc
@@ -1590,7 +1590,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
   py::class_<InferRequest, std::shared_ptr<InferRequest>>(
       module, "InferenceRequest")
       .def(
-          py::init([](const std::string& request_id, uint64_t correlation_id,
+          py::init([](const std::string& request_id, std::string correlation_id,
                       const std::vector<std::shared_ptr<PbTensor>>& inputs,
                       const std::vector<std::string>& requested_output_names,
                       const std::string& model_name,
@@ -1630,7 +1630,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
                 preferred_memory, trace);
           }),
           py::arg("request_id").none(false) = "",
-          py::arg("correlation_id").none(false) = 0,
+          py::arg("correlation_id").none(false) = "",
           py::arg("inputs").none(false),
           py::arg("requested_output_names").none(false),
           py::arg("model_name").none(false),
diff --git a/src/python_be.cc b/src/python_be.cc
index a8dfab07..e1ea8059 100644
--- a/src/python_be.cc
+++ b/src/python_be.cc
@@ -357,9 +357,9 @@ ModelInstanceState::SaveRequestsToSharedMemory(
     const char* id;
     RETURN_IF_ERROR(TRITONBACKEND_RequestId(request, &id));
 
-    uint64_t correlation_id;
+    std::string correlation_id;
     RETURN_IF_ERROR(
-        TRITONBACKEND_RequestCorrelationId(request, &correlation_id));
+        TRITONBACKEND_RequestCorrelationIdString(request, &correlation_id));
 
     uint32_t flags;
     RETURN_IF_ERROR(TRITONBACKEND_RequestFlags(request, &flags));
diff --git a/src/request_executor.cc b/src/request_executor.cc
index 65f53710..931f08fd 100644
--- a/src/request_executor.cc
+++ b/src/request_executor.cc
@@ -354,7 +354,7 @@ RequestExecutor::Infer(
     THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceRequestSetId(
         irequest, infer_request->RequestId().c_str()));
 
-    THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceRequestSetCorrelationId(
+    THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceRequestSetCorrelationIdString(
         irequest, infer_request->CorrelationId()));
 
     THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceRequestSetFlags(

From 89cfec442f1d1e5ebcdc19d993c29752683baa20 Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Wed, 24 Jan 2024 23:38:43 +0530
Subject: [PATCH 2/8] Fix compilation errors

---
 src/python_be.cc        | 2 +-
 src/request_executor.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/python_be.cc b/src/python_be.cc
index e1ea8059..5734280a 100644
--- a/src/python_be.cc
+++ b/src/python_be.cc
@@ -357,7 +357,7 @@ ModelInstanceState::SaveRequestsToSharedMemory(
     const char* id;
     RETURN_IF_ERROR(TRITONBACKEND_RequestId(request, &id));
 
-    std::string correlation_id;
+    const char* correlation_id;
     RETURN_IF_ERROR(
         TRITONBACKEND_RequestCorrelationIdString(request, &correlation_id));
 
diff --git a/src/request_executor.cc b/src/request_executor.cc
index 931f08fd..473f23fc 100644
--- a/src/request_executor.cc
+++ b/src/request_executor.cc
@@ -355,7 +355,7 @@ RequestExecutor::Infer(
         irequest, infer_request->RequestId().c_str()));
 
     THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceRequestSetCorrelationIdString(
-        irequest, infer_request->CorrelationId()));
+        irequest, infer_request->CorrelationId().c_str()));
 
     THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceRequestSetFlags(
         irequest, infer_request->Flags()));

From f2bce833e62331370e110207f3a2922bd26c0c8c Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Thu, 25 Jan 2024 12:25:51 +0530
Subject: [PATCH 3/8] Fix errors

---
 src/infer_request.cc | 30 +++++++++++++++++++++++-------
 src/infer_request.h  |  3 ++-
 src/pb_stub.cc       |  3 ++-
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/infer_request.cc b/src/infer_request.cc
index c0b17276..f9b3f9bf 100644
--- a/src/infer_request.cc
+++ b/src/infer_request.cc
@@ -199,11 +199,11 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
       (Inputs().size() * sizeof(bi::managed_external_buffer::handle_t)) +
       PbString::ShmStructSize(ModelName()) +
       PbString::ShmStructSize(RequestId()) +
+      PbString::ShmStructSize(CorrelationId()) +
       PbString::ShmStructSize(Parameters()));
 
   infer_request_shm_ptr_ =
       reinterpret_cast<InferRequestShm*>(infer_request_shm.data_.get());
-  infer_request_shm_ptr_->correlation_id = CorrelationId();
   infer_request_shm_ptr_->input_count = Inputs().size();
   infer_request_shm_ptr_->model_version = model_version_;
   infer_request_shm_ptr_->requested_output_count =
@@ -264,8 +264,15 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
       reinterpret_cast<char*>(infer_request_shm_ptr_) + request_id_offset,
       infer_request_shm.handle_ + request_id_offset);
 
-  size_t parameters_offset =
+  size_t correlation_id_offset =
       request_id_offset + PbString::ShmStructSize(RequestId());
+  std::unique_ptr<PbString> correlation_id_shm = PbString::Create(
+      CorrelationId(),
+      reinterpret_cast<char*>(infer_request_shm_ptr_) + correlation_id_offset,
+      infer_request_shm.handle_ + correlation_id_offset);
+
+  size_t parameters_offset =
+      correlation_id_offset + PbString::ShmStructSize(CorrelationId());
   std::unique_ptr<PbString> parameters_shm = PbString::Create(
       Parameters(),
       reinterpret_cast<char*>(infer_request_shm_ptr_) + parameters_offset,
@@ -274,6 +281,7 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
   // Save the references to shared memory.
   infer_request_shm_ = std::move(infer_request_shm);
   request_id_shm_ = std::move(request_id_shm);
+  correlation_id_shm_ = std::move(correlation_id_shm);
   model_name_shm_ = std::move(model_name_shm);
   parameters_shm_ = std::move(parameters_shm);
   shm_handle_ = infer_request_shm_.handle_;
@@ -336,25 +344,33 @@ InferRequest::LoadFromSharedMemory(
       request_handle + request_id_offset,
       reinterpret_cast<char*>(infer_request_shm_ptr) + request_id_offset);
 
-  size_t parameters_offset = request_id_offset + request_id_shm->Size();
-  std::unique_ptr<PbString> parameters_shm = PbString::LoadFromSharedMemory(
+  size_t correlation_id_offset = request_id_offset + request_id_shm->Size();
+  std::unique_ptr<PbString> correlation_id_shm = PbString::LoadFromSharedMemory(
       request_handle + request_id_offset,
+      reinterpret_cast<char*>(infer_request_shm_ptr) + correlation_id_offset);
+
+  size_t parameters_offset = correlation_id_offset + correlation_id_shm->Size();
+  std::unique_ptr<PbString> parameters_shm = PbString::LoadFromSharedMemory(
+      request_handle + correlation_id_offset,
       reinterpret_cast<char*>(infer_request_shm_ptr) + parameters_offset);
 
   return std::unique_ptr<InferRequest>(new InferRequest(
-      infer_request_shm, request_id_shm, requested_output_names_shm,
-      model_name_shm, input_tensors, parameters_shm));
+      infer_request_shm, request_id_shm, correlation_id_shm,
+      requested_output_names_shm, model_name_shm, input_tensors,
+      parameters_shm));
 }
 
 InferRequest::InferRequest(
     AllocatedSharedMemory<char>& infer_request_shm,
     std::unique_ptr<PbString>& request_id_shm,
+    std::unique_ptr<PbString>& correlation_id_shm,
     std::vector<std::unique_ptr<PbString>>& requested_output_names_shm,
     std::unique_ptr<PbString>& model_name_shm,
     std::vector<std::shared_ptr<PbTensor>>& input_tensors,
     std::unique_ptr<PbString>& parameters_shm)
     : infer_request_shm_(std::move(infer_request_shm)),
       request_id_shm_(std::move(request_id_shm)),
+      correlation_id_shm_(std::move(correlation_id_shm)),
       requested_output_names_shm_(std::move(requested_output_names_shm)),
       model_name_shm_(std::move(model_name_shm)),
       parameters_shm_(std::move(parameters_shm))
@@ -382,12 +398,12 @@ InferRequest::InferRequest(
   }
 
   request_id_ = request_id_shm_->String();
+  correlation_id_ = correlation_id_shm_->String();
   parameters_ = parameters_shm_->String();
   requested_output_names_ = std::move(requested_output_names);
   model_name_ = model_name_shm_->String();
   flags_ = infer_request_shm_ptr_->flags;
   model_version_ = infer_request_shm_ptr_->model_version;
-  correlation_id_ = infer_request_shm_ptr_->correlation_id;
   request_address_ = infer_request_shm_ptr_->address;
   response_factory_address_ = infer_request_shm_ptr_->response_factory_address;
   is_decoupled_ = infer_request_shm_ptr_->is_decoupled;
diff --git a/src/infer_request.h b/src/infer_request.h
index b56beacc..e2e50928 100644
--- a/src/infer_request.h
+++ b/src/infer_request.h
@@ -62,7 +62,6 @@ struct InferenceTrace {
 // Inference Request
 //
 struct InferRequestShm {
-  std::string correlation_id;
   uint32_t input_count;
   uint32_t requested_output_count;
   int64_t model_version;
@@ -141,6 +140,7 @@ class InferRequest {
   InferRequest(
       AllocatedSharedMemory<char>& infer_request_shm,
       std::unique_ptr<PbString>& request_id_shm,
+      std::unique_ptr<PbString>& correlation_id_shm,
       std::vector<std::unique_ptr<PbString>>& requested_output_names_shm,
       std::unique_ptr<PbString>& model_name_shm,
       std::vector<std::shared_ptr<PbTensor>>& input_tensors,
@@ -167,6 +167,7 @@ class InferRequest {
   InferRequestShm* infer_request_shm_ptr_;
 
   std::unique_ptr<PbString> request_id_shm_;
+  std::unique_ptr<PbString> correlation_id_shm_;
   std::vector<std::unique_ptr<PbString>> requested_output_names_shm_;
   std::unique_ptr<PbString> model_name_shm_;
   bi::managed_external_buffer::handle_t* output_names_handle_shm_ptr_;
diff --git a/src/pb_stub.cc b/src/pb_stub.cc
index e6727c93..8886ec3c 100644
--- a/src/pb_stub.cc
+++ b/src/pb_stub.cc
@@ -1590,7 +1590,8 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
   py::class_<InferRequest, std::shared_ptr<InferRequest>>(
       module, "InferenceRequest")
       .def(
-          py::init([](const std::string& request_id, std::string correlation_id,
+          py::init([](const std::string& request_id,
+                      const std::string& correlation_id,
                       const std::vector<std::shared_ptr<PbTensor>>& inputs,
                       const std::vector<std::string>& requested_output_names,
                       const std::string& model_name,

From c8e820379aead1da547e9e79f801665cf77df5c4 Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Wed, 31 Jan 2024 16:30:03 +0530
Subject: [PATCH 4/8] Support Int and String Correlation ID

---
 src/infer_request.cc    | 61 +++++++++++++++++++++++++----------------
 src/infer_request.h     | 14 ++++++----
 src/pb_stub.cc          | 26 ++++++++++++++----
 src/python_be.cc        | 26 +++++++++++-------
 src/request_executor.cc |  9 ++++--
 5 files changed, 90 insertions(+), 46 deletions(-)

diff --git a/src/infer_request.cc b/src/infer_request.cc
index f9b3f9bf..38f90616 100644
--- a/src/infer_request.cc
+++ b/src/infer_request.cc
@@ -38,14 +38,16 @@
 namespace triton { namespace backend { namespace python {
 
 InferRequest::InferRequest(
-    const std::string& request_id, const std::string& correlation_id,
+    const std::string& request_id, uint64_t correlation_id,
+    const std::string& correlation_id_string,
     const std::vector<std::shared_ptr<PbTensor>>& inputs,
     const std::set<std::string>& requested_output_names,
     const std::string& model_name, const int64_t model_version,
     const std::string& parameters, const uint32_t flags, const uint64_t timeout,
     const intptr_t response_factory_address, const intptr_t request_address,
     const PreferredMemory& preferred_memory, const InferenceTrace& trace)
-    : request_id_(request_id), correlation_id_(correlation_id), inputs_(inputs),
+    : request_id_(request_id), correlation_id_(correlation_id),
+      correlation_id_string_(correlation_id_string), inputs_(inputs),
       requested_output_names_(requested_output_names), model_name_(model_name),
       model_version_(model_version), parameters_(parameters), flags_(flags),
       timeout_(timeout), response_factory_address_(response_factory_address),
@@ -97,12 +99,18 @@ InferRequest::RequestId()
   return request_id_;
 }
 
-const std::string&
+uint64_t
 InferRequest::CorrelationId()
 {
   return correlation_id_;
 }
 
+const std::string&
+InferRequest::CorrelationIdString()
+{
+  return correlation_id_string_;
+}
+
 const std::set<std::string>&
 InferRequest::RequestedOutputNames()
 {
@@ -199,11 +207,12 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
       (Inputs().size() * sizeof(bi::managed_external_buffer::handle_t)) +
       PbString::ShmStructSize(ModelName()) +
       PbString::ShmStructSize(RequestId()) +
-      PbString::ShmStructSize(CorrelationId()) +
+      PbString::ShmStructSize(CorrelationIdString()) +
       PbString::ShmStructSize(Parameters()));
 
   infer_request_shm_ptr_ =
       reinterpret_cast<InferRequestShm*>(infer_request_shm.data_.get());
+  infer_request_shm_ptr_->correlation_id = CorrelationId();
   infer_request_shm_ptr_->input_count = Inputs().size();
   infer_request_shm_ptr_->model_version = model_version_;
   infer_request_shm_ptr_->requested_output_count =
@@ -264,15 +273,16 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
       reinterpret_cast<char*>(infer_request_shm_ptr_) + request_id_offset,
       infer_request_shm.handle_ + request_id_offset);
 
-  size_t correlation_id_offset =
+  size_t correlation_id_string_offset =
       request_id_offset + PbString::ShmStructSize(RequestId());
-  std::unique_ptr<PbString> correlation_id_shm = PbString::Create(
-      CorrelationId(),
-      reinterpret_cast<char*>(infer_request_shm_ptr_) + correlation_id_offset,
-      infer_request_shm.handle_ + correlation_id_offset);
-
-  size_t parameters_offset =
-      correlation_id_offset + PbString::ShmStructSize(CorrelationId());
+  std::unique_ptr<PbString> correlation_id_string_shm = PbString::Create(
+      CorrelationIdString(),
+      reinterpret_cast<char*>(infer_request_shm_ptr_) +
+          correlation_id_string_offset,
+      infer_request_shm.handle_ + correlation_id_string_offset);
+
+  size_t parameters_offset = correlation_id_string_offset +
+                             PbString::ShmStructSize(CorrelationIdString());
   std::unique_ptr<PbString> parameters_shm = PbString::Create(
       Parameters(),
       reinterpret_cast<char*>(infer_request_shm_ptr_) + parameters_offset,
@@ -281,7 +291,7 @@ InferRequest::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
   // Save the references to shared memory.
   infer_request_shm_ = std::move(infer_request_shm);
   request_id_shm_ = std::move(request_id_shm);
-  correlation_id_shm_ = std::move(correlation_id_shm);
+  correlation_id_string_shm_ = std::move(correlation_id_string_shm);
   model_name_shm_ = std::move(model_name_shm);
   parameters_shm_ = std::move(parameters_shm);
   shm_handle_ = infer_request_shm_.handle_;
@@ -344,18 +354,22 @@ InferRequest::LoadFromSharedMemory(
       request_handle + request_id_offset,
       reinterpret_cast<char*>(infer_request_shm_ptr) + request_id_offset);
 
-  size_t correlation_id_offset = request_id_offset + request_id_shm->Size();
-  std::unique_ptr<PbString> correlation_id_shm = PbString::LoadFromSharedMemory(
-      request_handle + request_id_offset,
-      reinterpret_cast<char*>(infer_request_shm_ptr) + correlation_id_offset);
+  size_t correlation_id_string_offset =
+      request_id_offset + request_id_shm->Size();
+  std::unique_ptr<PbString> correlation_id_string_shm =
+      PbString::LoadFromSharedMemory(
+          request_handle + request_id_offset,
+          reinterpret_cast<char*>(infer_request_shm_ptr) +
+              correlation_id_string_offset);
 
-  size_t parameters_offset = correlation_id_offset + correlation_id_shm->Size();
+  size_t parameters_offset =
+      correlation_id_string_offset + correlation_id_string_shm->Size();
   std::unique_ptr<PbString> parameters_shm = PbString::LoadFromSharedMemory(
-      request_handle + correlation_id_offset,
+      request_handle + correlation_id_string_offset,
       reinterpret_cast<char*>(infer_request_shm_ptr) + parameters_offset);
 
   return std::unique_ptr<InferRequest>(new InferRequest(
-      infer_request_shm, request_id_shm, correlation_id_shm,
+      infer_request_shm, request_id_shm, correlation_id_string_shm,
       requested_output_names_shm, model_name_shm, input_tensors,
       parameters_shm));
 }
@@ -363,14 +377,14 @@ InferRequest::LoadFromSharedMemory(
 InferRequest::InferRequest(
     AllocatedSharedMemory<char>& infer_request_shm,
     std::unique_ptr<PbString>& request_id_shm,
-    std::unique_ptr<PbString>& correlation_id_shm,
+    std::unique_ptr<PbString>& correlation_id_string_shm,
     std::vector<std::unique_ptr<PbString>>& requested_output_names_shm,
     std::unique_ptr<PbString>& model_name_shm,
     std::vector<std::shared_ptr<PbTensor>>& input_tensors,
     std::unique_ptr<PbString>& parameters_shm)
     : infer_request_shm_(std::move(infer_request_shm)),
       request_id_shm_(std::move(request_id_shm)),
-      correlation_id_shm_(std::move(correlation_id_shm)),
+      correlation_id_string_shm_(std::move(correlation_id_string_shm)),
       requested_output_names_shm_(std::move(requested_output_names_shm)),
       model_name_shm_(std::move(model_name_shm)),
       parameters_shm_(std::move(parameters_shm))
@@ -398,12 +412,13 @@ InferRequest::InferRequest(
   }
 
   request_id_ = request_id_shm_->String();
-  correlation_id_ = correlation_id_shm_->String();
+  correlation_id_string_ = correlation_id_string_shm_->String();
   parameters_ = parameters_shm_->String();
   requested_output_names_ = std::move(requested_output_names);
   model_name_ = model_name_shm_->String();
   flags_ = infer_request_shm_ptr_->flags;
   model_version_ = infer_request_shm_ptr_->model_version;
+  correlation_id_ = infer_request_shm_ptr_->correlation_id;
   request_address_ = infer_request_shm_ptr_->address;
   response_factory_address_ = infer_request_shm_ptr_->response_factory_address;
   is_decoupled_ = infer_request_shm_ptr_->is_decoupled;
diff --git a/src/infer_request.h b/src/infer_request.h
index e2e50928..b72e3aef 100644
--- a/src/infer_request.h
+++ b/src/infer_request.h
@@ -62,6 +62,7 @@ struct InferenceTrace {
 // Inference Request
 //
 struct InferRequestShm {
+  uint64_t correlation_id;
   uint32_t input_count;
   uint32_t requested_output_count;
   int64_t model_version;
@@ -78,7 +79,8 @@ struct InferRequestShm {
 class InferRequest {
  public:
   InferRequest(
-      const std::string& request_id, const std::string& correlation_id,
+      const std::string& request_id, uint64_t correlation_id,
+      const std::string& correlation_id_string,
       const std::vector<std::shared_ptr<PbTensor>>& inputs,
       const std::set<std::string>& requested_output_names,
       const std::string& model_name, const int64_t model_version,
@@ -92,7 +94,8 @@ class InferRequest {
   const std::vector<std::shared_ptr<PbTensor>>& Inputs();
   const std::string& RequestId();
   const std::string& Parameters();
-  const std::string& CorrelationId();
+  uint64_t CorrelationId();
+  const std::string& CorrelationIdString();
   const std::string& ModelName();
   int64_t ModelVersion();
   uint32_t Flags();
@@ -140,14 +143,15 @@ class InferRequest {
   InferRequest(
       AllocatedSharedMemory<char>& infer_request_shm,
       std::unique_ptr<PbString>& request_id_shm,
-      std::unique_ptr<PbString>& correlation_id_shm,
+      std::unique_ptr<PbString>& correlation_id_string_shm,
       std::vector<std::unique_ptr<PbString>>& requested_output_names_shm,
       std::unique_ptr<PbString>& model_name_shm,
       std::vector<std::shared_ptr<PbTensor>>& input_tensors,
       std::unique_ptr<PbString>& parameters_shm);
 
   std::string request_id_;
-  std::string correlation_id_;
+  uint64_t correlation_id_;
+  std::string correlation_id_string_;
   std::vector<std::shared_ptr<PbTensor>> inputs_;
   std::set<std::string> requested_output_names_;
   std::string model_name_;
@@ -167,7 +171,7 @@ class InferRequest {
   InferRequestShm* infer_request_shm_ptr_;
 
   std::unique_ptr<PbString> request_id_shm_;
-  std::unique_ptr<PbString> correlation_id_shm_;
+  std::unique_ptr<PbString> correlation_id_string_shm_;
   std::vector<std::unique_ptr<PbString>> requested_output_names_shm_;
   std::unique_ptr<PbString> model_name_shm_;
   bi::managed_external_buffer::handle_t* output_names_handle_shm_ptr_;
diff --git a/src/pb_stub.cc b/src/pb_stub.cc
index 8886ec3c..f692ae13 100644
--- a/src/pb_stub.cc
+++ b/src/pb_stub.cc
@@ -1591,7 +1591,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
       module, "InferenceRequest")
       .def(
           py::init([](const std::string& request_id,
-                      const std::string& correlation_id,
+                      const py::object& correlation_id,
                       const std::vector<std::shared_ptr<PbTensor>>& inputs,
                       const std::vector<std::string>& requested_output_names,
                       const std::string& model_name,
@@ -1624,14 +1624,27 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
             py::module_ py_json = py::module_::import("json");
             std::string parameters_str =
                 py::str(py_json.attr("dumps")(parameters));
+
+            uint64_t correlation_id_int = 0;
+            std::string correlation_id_str = "";
+
+            if (py::isinstance<py::int_>(correlation_id)) {
+              correlation_id_int = py::cast<uint64_t>(correlation_id);
+            } else if (py::isinstance<py::str>(correlation_id)) {
+              correlation_id_str = py::cast<std::string>(correlation_id);
+            } else {
+              throw PythonBackendException(
+                  "Correlation ID must be integer or string");
+            }
+
             return std::make_shared<InferRequest>(
-                request_id, correlation_id, inputs, requested_outputs,
-                model_name, model_version, parameters_str, flags, timeout,
-                0 /*response_factory_address*/, 0 /*request_address*/,
-                preferred_memory, trace);
+                request_id, correlation_id_int, correlation_id_str, inputs,
+                requested_outputs, model_name, model_version, parameters_str,
+                flags, timeout, 0 /*response_factory_address*/,
+                0 /*request_address*/, preferred_memory, trace);
           }),
           py::arg("request_id").none(false) = "",
-          py::arg("correlation_id").none(false) = "",
+          py::arg("correlation_id").none(false) = 0,
           py::arg("inputs").none(false),
           py::arg("requested_output_names").none(false),
           py::arg("model_name").none(false),
@@ -1646,6 +1659,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module)
           py::return_value_policy::reference_internal)
       .def("request_id", &InferRequest::RequestId)
       .def("correlation_id", &InferRequest::CorrelationId)
+      .def("correlation_id_string", &InferRequest::CorrelationIdString)
       .def("flags", &InferRequest::Flags)
       .def("set_flags", &InferRequest::SetFlags)
       .def("timeout", &InferRequest::Timeout)
diff --git a/src/python_be.cc b/src/python_be.cc
index 5734280a..c76df4f9 100644
--- a/src/python_be.cc
+++ b/src/python_be.cc
@@ -357,9 +357,14 @@ ModelInstanceState::SaveRequestsToSharedMemory(
     const char* id;
     RETURN_IF_ERROR(TRITONBACKEND_RequestId(request, &id));
 
-    const char* correlation_id;
-    RETURN_IF_ERROR(
-        TRITONBACKEND_RequestCorrelationIdString(request, &correlation_id));
+    uint64_t correlation_id;
+    const char* correlation_id_string;
+
+    auto error = TRITONBACKEND_RequestCorrelationId(request, &correlation_id);
+    if (error != nullptr) {
+      RETURN_IF_ERROR(TRITONBACKEND_RequestCorrelationIdString(
+          request, &correlation_id_string));
+    }
 
     uint32_t flags;
     RETURN_IF_ERROR(TRITONBACKEND_RequestFlags(request, &flags));
@@ -382,17 +387,18 @@ ModelInstanceState::SaveRequestsToSharedMemory(
       RETURN_IF_ERROR(TRITONBACKEND_ResponseFactoryNew(&factory_ptr, request));
 
       infer_request = std::make_unique<InferRequest>(
-          id, correlation_id, pb_input_tensors, requested_output_names,
-          model_state->Name(), model_state->Version(), parameters_string, flags,
-          request_timeout, reinterpret_cast<intptr_t>(factory_ptr),
+          id, correlation_id, correlation_id_string, pb_input_tensors,
+          requested_output_names, model_state->Name(), model_state->Version(),
+          parameters_string, flags, request_timeout,
+          reinterpret_cast<intptr_t>(factory_ptr),
           reinterpret_cast<intptr_t>(request),
           PreferredMemory(PreferredMemory::DEFAULT, 0), trace);
     } else {
       infer_request = std::make_unique<InferRequest>(
-          id, correlation_id, pb_input_tensors, requested_output_names,
-          model_state->Name(), model_state->Version(), parameters_string, flags,
-          request_timeout, 0 /* response_factory_address */,
-          reinterpret_cast<intptr_t>(request),
+          id, correlation_id, correlation_id_string, pb_input_tensors,
+          requested_output_names, model_state->Name(), model_state->Version(),
+          parameters_string, flags, request_timeout,
+          0 /* response_factory_address */, reinterpret_cast<intptr_t>(request),
           PreferredMemory(PreferredMemory::DEFAULT, 0), trace);
     }
 
diff --git a/src/request_executor.cc b/src/request_executor.cc
index 473f23fc..a462176e 100644
--- a/src/request_executor.cc
+++ b/src/request_executor.cc
@@ -354,8 +354,13 @@ RequestExecutor::Infer(
     THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceRequestSetId(
         irequest, infer_request->RequestId().c_str()));
 
-    THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceRequestSetCorrelationIdString(
-        irequest, infer_request->CorrelationId().c_str()));
+    if (infer_request->CorrelationIdString().empty()) {
+      THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceRequestSetCorrelationId(
+          irequest, infer_request->CorrelationId()));
+    } else {
+      THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceRequestSetCorrelationIdString(
+          irequest, infer_request->CorrelationIdString().c_str()));
+    }
 
     THROW_IF_TRITON_ERROR(TRITONSERVER_InferenceRequestSetFlags(
         irequest, infer_request->Flags()));

From 3c17dd09c4c207de83022fe9ff600a0de2ce23e2 Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Thu, 1 Feb 2024 01:18:50 +0530
Subject: [PATCH 5/8] Update shm offset

---
 src/infer_request.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/infer_request.cc b/src/infer_request.cc
index 38f90616..7a2d58f1 100644
--- a/src/infer_request.cc
+++ b/src/infer_request.cc
@@ -358,14 +358,14 @@ InferRequest::LoadFromSharedMemory(
       request_id_offset + request_id_shm->Size();
   std::unique_ptr<PbString> correlation_id_string_shm =
       PbString::LoadFromSharedMemory(
-          request_handle + request_id_offset,
+          request_handle + correlation_id_string_offset,
           reinterpret_cast<char*>(infer_request_shm_ptr) +
               correlation_id_string_offset);
 
   size_t parameters_offset =
       correlation_id_string_offset + correlation_id_string_shm->Size();
   std::unique_ptr<PbString> parameters_shm = PbString::LoadFromSharedMemory(
-      request_handle + correlation_id_string_offset,
+      request_handle + parameters_offset,
       reinterpret_cast<char*>(infer_request_shm_ptr) + parameters_offset);
 
   return std::unique_ptr<InferRequest>(new InferRequest(

From 65936703403920bbd08f61229f55e5d9a9e9c23c Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Fri, 9 Feb 2024 15:40:01 +0530
Subject: [PATCH 6/8] Update

---
 src/python_be.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python_be.cc b/src/python_be.cc
index 74d7c364..1bf87ac7 100644
--- a/src/python_be.cc
+++ b/src/python_be.cc
@@ -363,7 +363,7 @@ ModelInstanceState::SaveRequestsToSharedMemory(
     RETURN_IF_ERROR(TRITONBACKEND_RequestId(request, &id));
 
     uint64_t correlation_id;
-    const char* correlation_id_string;
+    const char* correlation_id_string = "";
 
     auto error = TRITONBACKEND_RequestCorrelationId(request, &correlation_id);
     if (error != nullptr) {

From 2b86e4b35e1201a3e7a639ae5ab0a8501331fb07 Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Fri, 9 Feb 2024 15:44:54 +0530
Subject: [PATCH 7/8] Fix pre-commit error

---
 src/python_be.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/python_be.cc b/src/python_be.cc
index 1bf87ac7..d803fecb 100644
--- a/src/python_be.cc
+++ b/src/python_be.cc
@@ -403,10 +403,10 @@ ModelInstanceState::SaveRequestsToSharedMemory(
           PreferredMemory(PreferredMemory::kDefault, 0), trace);
     } else {
       infer_request = std::make_unique<InferRequest>(
-          id, correlation_id, correlation_id_string, pb_input_tensors, requested_output_names,
-          model_state->Name(), model_state->Version(), parameters_string, flags,
-          request_timeout, 0 /* response_factory_address */,
-          reinterpret_cast<intptr_t>(request),
+          id, correlation_id, correlation_id_string, pb_input_tensors,
+          requested_output_names, model_state->Name(), model_state->Version(),
+          parameters_string, flags, request_timeout,
+          0 /* response_factory_address */, reinterpret_cast<intptr_t>(request),
           PreferredMemory(PreferredMemory::kDefault, 0), trace);
     }
 

From 80171c70ec668485c034db6ffe66e14bdfc563ac Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Mon, 12 Feb 2024 23:56:02 +0530
Subject: [PATCH 8/8] Initialize correlation_id

---
 src/python_be.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python_be.cc b/src/python_be.cc
index d803fecb..f566adaa 100644
--- a/src/python_be.cc
+++ b/src/python_be.cc
@@ -362,7 +362,7 @@ ModelInstanceState::SaveRequestsToSharedMemory(
     const char* id;
     RETURN_IF_ERROR(TRITONBACKEND_RequestId(request, &id));
 
-    uint64_t correlation_id;
+    uint64_t correlation_id = 0;
     const char* correlation_id_string = "";
 
     auto error = TRITONBACKEND_RequestCorrelationId(request, &correlation_id);