diff --git a/app/api/docker_control/views.py b/app/api/docker_control/views.py index 2a6acb06..2cdc0f91 100644 --- a/app/api/docker_control/views.py +++ b/app/api/docker_control/views.py @@ -19,6 +19,7 @@ perform_reset, ) from shared_config.model_config import model_implmentations +from shared_config.model_type_config import ModelTypes from .serializers import DeploymentSerializer, StopSerializer from shared_config.logger_config import get_logger diff --git a/app/api/shared_config/model_config.py b/app/api/shared_config/model_config.py index e33c3a33..27be6444 100644 --- a/app/api/shared_config/model_config.py +++ b/app/api/shared_config/model_config.py @@ -10,6 +10,7 @@ from shared_config.device_config import DeviceConfigurations from shared_config.backend_config import backend_config from shared_config.setup_config import SetupTypes +from shared_config.model_type_config import ModelTypes from shared_config.logger_config import get_logger logger = get_logger(__name__) @@ -50,6 +51,7 @@ class ModelImpl: docker_config: Dict[str, Any] service_route: str setup_type: SetupTypes + model_type: ModelTypes hf_model_id: str = None model_name: str = None # uses defaults based on hf_model_id model_id: str = None # uses defaults based on hf_model_id @@ -236,6 +238,7 @@ def base_docker_config(): service_port=7000, service_route="/objdetection_v2", setup_type=SetupTypes.NO_SETUP, + model_type=ModelTypes.OBJECT_DETECTION ), ModelImpl( hf_model_id="meta-llama/Llama-3.1-70B-Instruct", @@ -249,6 +252,7 @@ def base_docker_config(): service_port=7000, service_route="/v1/chat/completions", setup_type=SetupTypes.MAKE_VOLUMES, + model_type=ModelTypes.MOCK ), ModelImpl( hf_model_id="meta-llama/Llama-3.1-70B-Instruct", @@ -261,6 +265,7 @@ def base_docker_config(): service_route="/v1/chat/completions", env_file=os.environ.get("VLLM_LLAMA31_ENV_FILE"), setup_type=SetupTypes.TT_INFERENCE_SERVER, + model_type=ModelTypes.CHAT ), ModelImpl( hf_model_id="meta-llama/Llama-3.2-1B-Instruct", @@ -270,6 +275,7 @@ def base_docker_config(): docker_config=base_docker_config(), service_route="/v1/chat/completions", setup_type=SetupTypes.TT_INFERENCE_SERVER, + model_type=ModelTypes.CHAT ), ModelImpl( hf_model_id="meta-llama/Llama-3.2-3B-Instruct", @@ -279,6 +285,7 @@ def base_docker_config(): docker_config=base_docker_config(), service_route="/v1/chat/completions", setup_type=SetupTypes.TT_INFERENCE_SERVER, + model_type=ModelTypes.CHAT ), ModelImpl( hf_model_id="meta-llama/Llama-3.1-8B-Instruct", @@ -288,6 +295,7 @@ def base_docker_config(): docker_config=base_docker_config(), service_route="/v1/chat/completions", setup_type=SetupTypes.TT_INFERENCE_SERVER, + model_type=ModelTypes.CHAT ), ModelImpl( hf_model_id="meta-llama/Llama-3.2-11B-Vision-Instruct", @@ -297,6 +305,7 @@ def base_docker_config(): docker_config=base_docker_config(), service_route="/v1/chat/completions", setup_type=SetupTypes.TT_INFERENCE_SERVER, + model_type=ModelTypes.CHAT ), ModelImpl( hf_model_id="meta-llama/Llama-3.1-70B-Instruct", @@ -306,6 +315,7 @@ def base_docker_config(): docker_config=base_docker_config(), service_route="/v1/chat/completions", setup_type=SetupTypes.TT_INFERENCE_SERVER, + model_type=ModelTypes.CHAT ), ModelImpl( hf_model_id="meta-llama/Llama-3.3-70B-Instruct", @@ -315,6 +325,7 @@ def base_docker_config(): docker_config=base_docker_config(), service_route="/v1/chat/completions", setup_type=SetupTypes.TT_INFERENCE_SERVER, + model_type=ModelTypes.CHAT ), #! Add new model vLLM model implementations here ] diff --git a/app/api/shared_config/model_type_config.py b/app/api/shared_config/model_type_config.py new file mode 100644 index 00000000..a63e697d --- /dev/null +++ b/app/api/shared_config/model_type_config.py @@ -0,0 +1,7 @@ +from enum import Enum + +class ModelTypes(Enum): + MOCK = "mock" + CHAT = "chat" + OBJECT_DETECTION = "object_detection" + IMAGE_GENERATION = "image_generation" \ No newline at end of file