From 96cece2f3317f334cda23ebf98df230fbeaa5be1 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Fri, 6 Dec 2024 00:27:24 -0800 Subject: [PATCH] Update L0_check_health_vllm engine failure mock --- .../mock_async_llm_engine.py | 36 ------------------- ci/L0_check_health_vllm/test.sh | 20 +++++++---- 2 files changed, 14 insertions(+), 42 deletions(-) delete mode 100644 ci/L0_check_health_vllm/mock_async_llm_engine.py diff --git a/ci/L0_check_health_vllm/mock_async_llm_engine.py b/ci/L0_check_health_vllm/mock_async_llm_engine.py deleted file mode 100644 index d8d9f038..00000000 --- a/ci/L0_check_health_vllm/mock_async_llm_engine.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from vllm.engine.async_llm_engine import AsyncLLMEngine as real_AsyncLLMEngine - - -class mock_AsyncLLMEngine(real_AsyncLLMEngine): - _mock_check_health_count = 0 - - async def check_health(self) -> None: - self._mock_check_health_count += 1 - if self._mock_check_health_count > 1: - raise RuntimeError("Simulated vLLM check_health() failure") diff --git a/ci/L0_check_health_vllm/test.sh b/ci/L0_check_health_vllm/test.sh index 9c3b4eec..50c1a097 100755 --- a/ci/L0_check_health_vllm/test.sh +++ b/ci/L0_check_health_vllm/test.sh @@ -47,16 +47,24 @@ function enable_health_check { echo -e "}" >> models/vllm_opt/config.pbtxt } +VLLM_INSTALL_PATH="/usr/local/lib/python3.12/dist-packages/vllm" + function mock_vllm_async_llm_engine { - mv /opt/tritonserver/backends/vllm/model.py /opt/tritonserver/backends/vllm/.model.py.backup - cp /opt/tritonserver/backends/vllm/.model.py.backup /opt/tritonserver/backends/vllm/model.py - sed -i 's/from vllm.engine.async_llm_engine import AsyncLLMEngine/from mock_async_llm_engine import mock_AsyncLLMEngine as AsyncLLMEngine/' /opt/tritonserver/backends/vllm/model.py - cp mock_async_llm_engine.py /opt/tritonserver/backends/vllm + # backup original file + mv $VLLM_INSTALL_PATH/engine/multiprocessing/client.py $VLLM_INSTALL_PATH/engine/multiprocessing/client.py.backup + cp $VLLM_INSTALL_PATH/engine/multiprocessing/client.py.backup $VLLM_INSTALL_PATH/engine/multiprocessing/client.py + # overwrite the original check_health method + echo -e "" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py + echo -e " async def check_health(self, check_count=[0]):" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py + echo -e " check_count[0] += 1" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py + echo -e " if check_count[0] > 1:" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py + echo -e " raise RuntimeError(\"Simulated vLLM check_health() failure\")" >> $VLLM_INSTALL_PATH/engine/multiprocessing/client.py } function unmock_vllm_async_llm_engine { - rm -f /opt/tritonserver/backends/vllm/mock_async_llm_engine.py /opt/tritonserver/backends/vllm/model.py - mv /opt/tritonserver/backends/vllm/.model.py.backup /opt/tritonserver/backends/vllm/model.py + # restore from backup + rm -f $VLLM_INSTALL_PATH/engine/multiprocessing/client.py + mv $VLLM_INSTALL_PATH/engine/multiprocessing/client.py.backup $VLLM_INSTALL_PATH/engine/multiprocessing/client.py } function test_check_health {