diff --git a/docs/model_support.md b/docs/model_support.md index fa0739128..25bc3d6e6 100644 --- a/docs/model_support.md +++ b/docs/model_support.md @@ -54,6 +54,8 @@ - [OpenLemur/lemur-70b-chat-v1](https://huggingface.co/OpenLemur/lemur-70b-chat-v1) - [allenai/tulu-2-dpo-7b](https://huggingface.co/allenai/tulu-2-dpo-7b) - [Microsoft/Orca-2-7b](https://huggingface.co/microsoft/Orca-2-7b) +- [deepseek-ai/deepseek-llm-67b-chat](https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat) +- [deepseek-ai/deepseek-coder-33b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct) - Any [EleutherAI](https://huggingface.co/EleutherAI) pythia model such as [pythia-6.9b](https://huggingface.co/EleutherAI/pythia-6.9b) - Any [Peft](https://github.com/huggingface/peft) adapter trained on top of a model above. To activate, must have `peft` in the model path. Note: If diff --git a/fastchat/conversation.py b/fastchat/conversation.py index 9c8b57e13..b6ff0d93d 100644 --- a/fastchat/conversation.py +++ b/fastchat/conversation.py @@ -29,6 +29,7 @@ class SeparatorStyle(IntEnum): ROBIN = auto() FALCON_CHAT = auto() CHATGLM3 = auto() + DEEPSEEK_CHAT = auto() @dataclasses.dataclass @@ -224,6 +225,15 @@ def get_prompt(self) -> str: ret += role + ":" return ret + elif self.sep_style == SeparatorStyle.DEEPSEEK_CHAT: + seps = [self.sep, self.sep2] + ret = system_prompt + for i, (role, message) in enumerate(self.messages): + if message: + ret += role + ": " + message + seps[i % 2] + else: + ret += role + ":" + return ret else: raise ValueError(f"Invalid style: {self.sep_style}") @@ -530,7 +540,7 @@ def get_conv_template(name: str) -> Conversation: # Deepseek code default template register_conv_template( Conversation( - name="deepseek", + name="deepseek-coder", system_template="You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.", roles=("### Instruction:", "### Response:"), sep="\n", @@ -1265,6 +1275,20 @@ def get_conv_template(name: str) -> Conversation: ) ) +# Deepseek-chat template +# reference: https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat/blob/main/tokenizer_config.json +register_conv_template( + Conversation( + name="deepseek-chat", + system_message="<|begin▁of▁sentence|>", # must add a bos token before first message + roles=("User", "Assistant"), + sep_style=SeparatorStyle.DEEPSEEK_CHAT, + sep="\n\n", + sep2="<|end▁of▁sentence|>", + stop_str="<|end▁of▁sentence|>", + ) +) + if __name__ == "__main__": from fastchat.conversation import get_conv_template diff --git a/fastchat/model/model_adapter.py b/fastchat/model/model_adapter.py index 3277bbe1a..1610738a0 100644 --- a/fastchat/model/model_adapter.py +++ b/fastchat/model/model_adapter.py @@ -1934,6 +1934,28 @@ def get_default_conv_template(self, model_path: str) -> Conversation: return get_conv_template("Yi-34b-chat") +class DeepseekCoderAdapter(BaseModelAdapter): + """The model adapter for deepseek-ai's coder models""" + + def match(self, model_path: str): + return "deepseek-coder" in model_path.lower() + + def get_default_conv_template(self, model_path: str) -> Conversation: + return get_conv_template("deepseek-coder") + + +class DeepseekChatAdapter(BaseModelAdapter): + """The model adapter for deepseek-ai's chat models""" + + # Note: that this model will require tokenizer version >= 0.13.3 because the tokenizer class is LlamaTokenizerFast + + def match(self, model_path: str): + return "deepseek-llm" in model_path.lower() and "chat" in model_path.lower() + + def get_default_conv_template(self, model_path: str) -> Conversation: + return get_conv_template("deepseek-chat") + + # Note: the registration order matters. # The one registered earlier has a higher matching priority. register_model_adapter(PeftModelAdapter) @@ -2005,6 +2027,8 @@ def get_default_conv_template(self, model_path: str) -> Conversation: register_model_adapter(PygmalionAdapter) register_model_adapter(MicrosoftOrcaAdapter) register_model_adapter(YiAdapter) +register_model_adapter(DeepseekCoderAdapter) +register_model_adapter(DeepseekChatAdapter) # After all adapters, try the default base adapter. register_model_adapter(BaseModelAdapter)