From 49f4197a507fb728f78a7b243c05e82c1ee2c90a Mon Sep 17 00:00:00 2001
From: Sanjay Nadhavajhala <sanjay@acorn.io>
Date: Wed, 12 Jun 2024 14:15:07 -0700
Subject: [PATCH] do preprocessing properly

---
 vllm/entrypoints/openai/serving_chat.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 711d27aa963ab..e1d3798baecbe 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -179,7 +179,6 @@ def _parse_chat_message_content(
     ) -> ChatMessageParseResult:
         role = message["role"]
         content = message.get("content")
-
         if content is None:
             return ChatMessageParseResult(messages=[], image_futures=[])
         if isinstance(content, str):
@@ -210,12 +209,7 @@ async def create_chat_completion(
         try:
             conversation: List[ConversationMessage] = []
             image_futures: List[Awaitable[ImagePixelData]] = []
-
-            raw_msgs = request.messages
-            if request.tools:
-                print("==================tools====================")
-                tools = [t.model_dump() for t in request.tools]
-                raw_msgs = preprocess_input(msgs=raw_msgs, tools=tools)
+            print("==================create chat completion====================")
             
             for msg in request.messages:
                 chat_parsed_result = self._parse_chat_message_content(msg)
@@ -223,7 +217,12 @@ async def create_chat_completion(
                 conversation.extend(chat_parsed_result.messages)
                 image_futures.extend(chat_parsed_result.image_futures)
             
-            conversation = raw_msgs
+            
+            if request.tools:
+                raw_msgs = request.messages
+                tools = [t.model_dump() for t in request.tools]
+                raw_msgs = preprocess_input(msgs=raw_msgs, tools=tools)
+                conversation = raw_msgs
 
             prompt = self.tokenizer.apply_chat_template(
                 conversation=conversation,