diff --git a/tests/mock_vllm_api_server.py b/tests/mock_vllm_api_server.py index 0dad192..e7ec6d3 100644 --- a/tests/mock_vllm_api_server.py +++ b/tests/mock_vllm_api_server.py @@ -12,6 +12,7 @@ from vllm import ModelRegistry # import classes to mock +# TODO: import logging_init_wrapper from vllm-tt-metal-llama3-70b/src/logging_utils.py after refactor from vllm.worker.tt_worker import TTWorker, TTCacheEngine from mock_vllm_model import ( new_init_cache_enginer, diff --git a/tests/mock_vllm_offline_inference_tt.py b/tests/mock_vllm_offline_inference_tt.py index df2e508..4ecbdc9 100644 --- a/tests/mock_vllm_offline_inference_tt.py +++ b/tests/mock_vllm_offline_inference_tt.py @@ -2,8 +2,9 @@ import json import time from unittest.mock import patch - import uvloop + +# TODO: import logging_init_wrapper from vllm-tt-metal-llama3-70b/src/logging_utils.py after refactor from mock_vllm_model import ( MockModel, new_allocate_kv_cache,