From ea52411c222d746a7be7fe17fc1a30d8c40d27aa Mon Sep 17 00:00:00 2001
From: Aleksandr Movchan <movchan174@gmail.com>
Date: Tue, 20 Feb 2024 16:34:39 +0000
Subject: [PATCH] Add chat_template and enforce_eager options to docsting of
 VLLMConfig

---
 aana/deployments/vllm_deployment.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/aana/deployments/vllm_deployment.py b/aana/deployments/vllm_deployment.py
index 7c200116..c4f0acad 100644
--- a/aana/deployments/vllm_deployment.py
+++ b/aana/deployments/vllm_deployment.py
@@ -34,6 +34,9 @@ class VLLMConfig(BaseModel):
         gpu_memory_reserved (float): the GPU memory reserved for the model in mb
         default_sampling_params (SamplingParams): the default sampling parameters.
         max_model_len (int): the maximum generated text length in tokens (optional, default: None)
+        chat_template (str): the name of the chat template, if not provided, the chat template from the model will be used
+                             but some models may not have a chat template (optional, default: None)
+        enforce_eager (bool): whether to enforce eager execution (optional, default: False)
     """
 
     model: str