From 257a2a9b28af369ab4db69638d620485d227fa17 Mon Sep 17 00:00:00 2001 From: Mikhail Khludnev Date: Thu, 15 Feb 2024 23:58:14 +0300 Subject: [PATCH] Demonstrate passing "max_tokens" param context https://github.com/triton-inference-server/server/issues/6864 --- samples/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/client.py b/samples/client.py index 06bf0c3e..58af15a8 100755 --- a/samples/client.py +++ b/samples/client.py @@ -90,7 +90,7 @@ async def process_stream(self, prompts, sampling_parameters): self._results_dict[result.get_response().id].append(i) async def run(self): - sampling_parameters = {"temperature": "0.1", "top_p": "0.95"} + sampling_parameters = {"temperature": "0.1", "top_p": "0.95", "max_tokens":"100"} with open(self._flags.input_prompts, "r") as file: print(f"Loading inputs from `{self._flags.input_prompts}`...") prompts = file.readlines()