From ea52411c222d746a7be7fe17fc1a30d8c40d27aa Mon Sep 17 00:00:00 2001 From: Aleksandr Movchan Date: Tue, 20 Feb 2024 16:34:39 +0000 Subject: [PATCH] Add chat_template and enforce_eager options to docsting of VLLMConfig --- aana/deployments/vllm_deployment.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/aana/deployments/vllm_deployment.py b/aana/deployments/vllm_deployment.py index 7c200116..c4f0acad 100644 --- a/aana/deployments/vllm_deployment.py +++ b/aana/deployments/vllm_deployment.py @@ -34,6 +34,9 @@ class VLLMConfig(BaseModel): gpu_memory_reserved (float): the GPU memory reserved for the model in mb default_sampling_params (SamplingParams): the default sampling parameters. max_model_len (int): the maximum generated text length in tokens (optional, default: None) + chat_template (str): the name of the chat template, if not provided, the chat template from the model will be used + but some models may not have a chat template (optional, default: None) + enforce_eager (bool): whether to enforce eager execution (optional, default: False) """ model: str