From 8723c75e41ca320a36051ab3ce607d1e643689de Mon Sep 17 00:00:00 2001
From: kthui <18255193+kthui@users.noreply.github.com>
Date: Tue, 6 Aug 2024 16:06:17 -0700
Subject: [PATCH] Add test for sending response after sending complete final
 flag

---
 .../response_sender_complete_final_test.py    | 77 +++++++++++++++++++
 qa/L0_backend_python/response_sender/test.sh  | 31 ++++++++
 .../config.pbtxt                              | 47 +++++++++++
 .../response_sender_complete_final/model.py   | 63 +++++++++++++++
 4 files changed, 218 insertions(+)
 create mode 100644 qa/L0_backend_python/response_sender/response_sender_complete_final_test.py
 create mode 100644 qa/python_models/response_sender_complete_final/config.pbtxt
 create mode 100644 qa/python_models/response_sender_complete_final/model.py

diff --git a/qa/L0_backend_python/response_sender/response_sender_complete_final_test.py b/qa/L0_backend_python/response_sender/response_sender_complete_final_test.py
new file mode 100644
index 0000000000..386a54e3d3
--- /dev/null
+++ b/qa/L0_backend_python/response_sender/response_sender_complete_final_test.py
@@ -0,0 +1,77 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import time
+import unittest
+
+import numpy as np
+import tritonclient.grpc as grpcclient
+
+
+class ResponseSenderTest(unittest.TestCase):
+    def _generate_streaming_callback_and_responses_pair(self):
+        responses = []  # [{"result": result, "error": error}, ...]
+
+        def callback(result, error):
+            responses.append({"result": result, "error": error})
+
+        return callback, responses
+
+    def test_respond_after_complete_final(self):
+        with open(os.environ["SERVER_LOG"]) as f:
+            server_log = f.read()
+        self.assertNotIn("Test Passed", server_log)
+
+        model_name = "response_sender_complete_final"
+        shape = [1, 1]
+        inputs = [grpcclient.InferInput("INPUT0", shape, "FP32")]
+        input0_np = np.array([[123.45]], np.float32)
+        inputs[0].set_data_from_numpy(input0_np)
+
+        callback, responses = self._generate_streaming_callback_and_responses_pair()
+        with grpcclient.InferenceServerClient("localhost:8001") as client:
+            client.start_stream(callback)
+            client.async_stream_infer(model_name, inputs)
+            client.stop_stream()
+
+        self.assertEqual(len(responses), 1)
+        for response in responses:
+            output0_np = response["result"].as_numpy(name="OUTPUT0")
+            self.assertTrue(np.allclose(input0_np, output0_np))
+            self.assertIsNone(response["error"])
+
+        time.sleep(1)  # make sure the logs are written before checking
+        with open(os.environ["SERVER_LOG"]) as f:
+            server_log = f.read()
+        self.assertNotIn("Unexpected request length", server_log)
+        self.assertNotIn("Expected exception not raised", server_log)
+        self.assertNotIn("Test FAILED", server_log)
+        self.assertIn("Test Passed", server_log)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_backend_python/response_sender/test.sh b/qa/L0_backend_python/response_sender/test.sh
index 33db46edbb..cca7e7acfa 100755
--- a/qa/L0_backend_python/response_sender/test.sh
+++ b/qa/L0_backend_python/response_sender/test.sh
@@ -97,6 +97,37 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 
+#
+# Test response sender to raise exception on response after complete final flag
+#
+rm -rf models && mkdir models
+mkdir -p models/response_sender_complete_final/1 && \
+    cp ../../python_models/response_sender_complete_final/model.py models/response_sender_complete_final/1 && \
+    cp ../../python_models/response_sender_complete_final/config.pbtxt models/response_sender_complete_final
+
+TEST_LOG="response_sender_complete_final_test.log"
+SERVER_LOG="response_sender_complete_final_test.server.log"
+SERVER_ARGS="--model-repository=${MODELDIR}/response_sender/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+SERVER_LOG=$SERVER_LOG python3 -m pytest --junitxml=concurrency_test.report.xml response_sender_complete_final_test.py > $TEST_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** response sender complete final test FAILED\n***"
+    cat $TEST_LOG
+    RET=1
+fi
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
 #
 # Test async response sender under decoupled / non-decoupled
 #
diff --git a/qa/python_models/response_sender_complete_final/config.pbtxt b/qa/python_models/response_sender_complete_final/config.pbtxt
new file mode 100644
index 0000000000..f08ed6da5b
--- /dev/null
+++ b/qa/python_models/response_sender_complete_final/config.pbtxt
@@ -0,0 +1,47 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+backend: "python"
+max_batch_size: 8
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  }
+]
+
+instance_group [{ kind: KIND_CPU }]
+model_transaction_policy { decoupled: True }
diff --git a/qa/python_models/response_sender_complete_final/model.py b/qa/python_models/response_sender_complete_final/model.py
new file mode 100644
index 0000000000..e17f0b04f6
--- /dev/null
+++ b/qa/python_models/response_sender_complete_final/model.py
@@ -0,0 +1,63 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    def execute(self, requests):
+        # Expect exactly one request per execute() call.
+        if len(requests) != 1:
+            pb_utils.Logger.log_error(f"Unexpected request length: {len(requests)}")
+            raise Exception("Test FAILED")
+
+        # Send a response with complete final flag, and then send another response and
+        # and assert an exception is raised, for all requests.
+        for request in requests:
+            in_tensor = pb_utils.get_input_tensor_by_name(request, "INPUT0")
+            out_tensor = pb_utils.Tensor("OUTPUT0", in_tensor.as_numpy())
+            response = pb_utils.InferenceResponse([out_tensor])
+            response_sender = request.get_response_sender()
+            response_sender.send(
+                response, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL
+            )
+            test_passed = False
+            try:
+                response_sender.send(response)
+            except Exception as e:
+                pb_utils.Logger.log_info(f"Raised exception: {e}")
+                if (
+                    str(e)
+                    == "Unable to send response. Response sender has been closed."
+                ):
+                    test_passed = True
+            finally:
+                if not test_passed:
+                    pb_utils.Logger.log_error("Expected exception not raised")
+                    raise Exception("Test FAILED")
+            pb_utils.Logger.log_info("Test Passed")
+        return None