From 257a2a9b28af369ab4db69638d620485d227fa17 Mon Sep 17 00:00:00 2001
From: Mikhail Khludnev <mkhludnev@users.noreply.github.com>
Date: Thu, 15 Feb 2024 23:58:14 +0300
Subject: [PATCH] Demonstrate passing "max_tokens" param

context https://github.com/triton-inference-server/server/issues/6864
---
 samples/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/client.py b/samples/client.py
index 06bf0c3e..58af15a8 100755
--- a/samples/client.py
+++ b/samples/client.py
@@ -90,7 +90,7 @@ async def process_stream(self, prompts, sampling_parameters):
                     self._results_dict[result.get_response().id].append(i)
 
     async def run(self):
-        sampling_parameters = {"temperature": "0.1", "top_p": "0.95"}
+        sampling_parameters = {"temperature": "0.1", "top_p": "0.95", "max_tokens":"100"}
         with open(self._flags.input_prompts, "r") as file:
             print(f"Loading inputs from `{self._flags.input_prompts}`...")
             prompts = file.readlines()