diff --git a/Dockerfile b/Dockerfile index 8594c2a11846..ff38ec798b15 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ ARG TARGETARCH ARG TARGETVARIANT ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" RUN apt-get update && \ @@ -442,9 +442,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "vllm" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE ; fi && \ if [[ ( "${EXTRA_BACKENDS}" =~ "rerankers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/rerankers \ - ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "mamba" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/mamba \ ; fi # Make sure the models directory exists diff --git a/Makefile b/Makefile index 312bfcc40942..6f14d046f82d 100644 --- a/Makefile +++ b/Makefile @@ -533,10 +533,10 @@ protogen-go-clean: $(RM) bin/* .PHONY: protogen-python -protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen .PHONY: protogen-python-clean -protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean .PHONY: autogptq-protogen autogptq-protogen: @@ -578,14 +578,6 @@ exllama2-protogen: exllama2-protogen-clean: $(MAKE) -C backend/python/exllama2 protogen-clean -.PHONY: mamba-protogen -mamba-protogen: - $(MAKE) -C backend/python/mamba protogen - -.PHONY: mamba-protogen-clean -mamba-protogen-clean: - $(MAKE) -C backend/python/mamba protogen-clean - .PHONY: rerankers-protogen rerankers-protogen: $(MAKE) -C backend/python/rerankers protogen @@ -642,7 +634,6 @@ prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/coqui $(MAKE) -C backend/python/diffusers $(MAKE) -C backend/python/vllm - $(MAKE) -C backend/python/mamba $(MAKE) -C backend/python/rerankers $(MAKE) -C backend/python/transformers $(MAKE) -C backend/python/parler-tts diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py index 9b65c6db2ea3..b0d5875bde2f 100644 --- a/backend/python/transformers/backend.py +++ b/backend/python/transformers/backend.py @@ -21,7 +21,7 @@ XPU=os.environ.get("XPU", "0") == "1" -from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria +from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria, MambaConfig, MambaForCausalLM from transformers import AutoProcessor, MusicgenForConditionalGeneration from scipy.io import wavfile import outetts @@ -245,6 +245,10 @@ def LoadModel(self, request, context): autoTokenizer = False self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode) self.SentenceTransformer = True + elif request.Type == "Mamba": + autoTokenizer = False + self.tokenizer = AutoTokenizer.from_pretrained(model_name) + self.model = MambaForCausalLM.from_pretrained(model_name) else: print("Automodel", file=sys.stderr) self.model = AutoModel.from_pretrained(model_name, diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index b2a5293bdd95..d5f1459b7636 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -29,12 +29,14 @@ var Aliases map[string]string = map[string]string{ "langchain-huggingface": LCHuggingFaceBackend, "transformers-musicgen": TransformersBackend, "sentencetransformers": TransformersBackend, + "mamba": TransformersBackend, "stablediffusion": StableDiffusionGGMLBackend, } var TypeAlias map[string]string = map[string]string{ "sentencetransformers": "SentenceTransformer", "huggingface-embeddings": "SentenceTransformer", + "mamba": "Mamba", "transformers-musicgen": "MusicgenForConditionalGeneration", }