From d1394a5afe80ec67d059a69f02c0526122359039 Mon Sep 17 00:00:00 2001
From: nnshah1 <neelays@nvidia.com>
Date: Tue, 9 Jan 2024 14:54:47 -0800
Subject: [PATCH] updated naming and example

---
 python/tritonserver/__init__.py       |  1 +
 python/tritonserver/_api/_response.py | 11 +++++-----
 python/tritonserver/_api/_tensor.py   | 31 ++++++++++++++++++++++++++-
 3 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/python/tritonserver/__init__.py b/python/tritonserver/__init__.py
index a83e84ef2..c2d977b82 100644
--- a/python/tritonserver/__init__.py
+++ b/python/tritonserver/__init__.py
@@ -50,6 +50,7 @@
 from tritonserver._api._allocators import (
     default_memory_allocators as default_memory_allocators,
 )
+from tritonserver._api._dlpack import DLDeviceType as DLDeviceType
 from tritonserver._api._model import Model as Model
 from tritonserver._api._model import ModelBatchFlag as ModelBatchFlag
 from tritonserver._api._model import ModelTxnPropertyFlag as ModelTxnPropertyFlag
diff --git a/python/tritonserver/_api/_response.py b/python/tritonserver/_api/_response.py
index 99fc6e50b..dc9c53c9e 100644
--- a/python/tritonserver/_api/_response.py
+++ b/python/tritonserver/_api/_response.py
@@ -397,7 +397,7 @@ class InferenceResponse:
     final: bool = False
 
     @staticmethod
-    def _from_TRITONSERVER_InferenceResponse(
+    def _from_tritonserver_inference_response(
         model: _model.Model,
         server: TRITONSERVER_Server,
         request: TRITONSERVER_InferenceRequest,
@@ -435,10 +435,10 @@ def _from_TRITONSERVER_InferenceResponse(
                     name,
                     data_type,
                     shape,
-                    data_ptr,
-                    byte_size,
-                    memory_type,
-                    memory_type_id,
+                    _data_ptr,
+                    _byte_size,
+                    _memory_type,
+                    _memory_type_id,
                     memory_buffer,
                 ) = response.output(output_index)
                 tensor = Tensor(data_type, shape, memory_buffer)
@@ -450,6 +450,7 @@ def _from_TRITONSERVER_InferenceResponse(
             error.args += tuple(asdict(result).items())
             result.error = error
 
+        # TODO: support classification
         # values["classification_label"] = response.output_classification_label()
 
         return result
diff --git a/python/tritonserver/_api/_tensor.py b/python/tritonserver/_api/_tensor.py
index 505e8e410..92df1d104 100644
--- a/python/tritonserver/_api/_tensor.py
+++ b/python/tritonserver/_api/_tensor.py
@@ -143,7 +143,8 @@ def __dlpack__(self, *, stream=None):
         Any
             A DLPack-compatible object representing the tensor.
         """
-
+        # TODO:  Handle the stream argument correctly
+        #
         #        if not (stream is None or (isinstance(stream, int) and stream == 0)):
         #           raise UnsupportedError(
         #              f"DLPack stream synchronization on {stream} not currently supported"
@@ -364,6 +365,34 @@ def to_device(self, device: DeviceOrMemoryType) -> Tensor:
         -------
         Tensor
             The tensor moved to the specified device.
+
+        Examples
+        --------
+
+        tensor_cpu = tritonserver.Tensor.from_dlpack(numpy.array([0,1,2], dtype=numpy.float16))
+
+        # Different ways to specify the device
+
+        tensor_gpu = tensor_cpu.to_device(MemoryType.GPU)
+
+        tensor_gpu = tensor_cpu.to_device((MemoryType.GPU,0))
+
+        tensor_gpu = tensor_cpu.to_device((DLDeviceType.kDLCUDA,0))
+
+        tensor_gpu = tensor_cpu.to_device("gpu")
+
+        tensor_gpu = tensor_cpu.to_device("gpu:0")
+
+        ndarray_gpu = cupy.from_dlpack(tensor_gpu)
+
+        ndarray_gpu[0] = ndarray_gpu.mean()
+
+        tensor_cpu = tensor_gpu.to_device("cpu")
+
+        ndarray_cpu = numpy.from_dlpack(tensor_cpu)
+
+        assert ndarray_cpu[0] == ndarray_gpu[0]
+
         """
         memory_type, memory_type_id = parse_device_or_memory_type(device)
         if self.memory_type == memory_type and self.memory_type_id == memory_type_id: