Skip to content

Commit

Permalink
Add deepseek chat (#2760)
Browse files Browse the repository at this point in the history
  • Loading branch information
BabyChouSr authored Dec 1, 2023
1 parent 686ab04 commit decceed
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 1 deletion.
2 changes: 2 additions & 0 deletions docs/model_support.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@
- [OpenLemur/lemur-70b-chat-v1](https://huggingface.co/OpenLemur/lemur-70b-chat-v1)
- [allenai/tulu-2-dpo-7b](https://huggingface.co/allenai/tulu-2-dpo-7b)
- [Microsoft/Orca-2-7b](https://huggingface.co/microsoft/Orca-2-7b)
- [deepseek-ai/deepseek-llm-67b-chat](https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat)
- [deepseek-ai/deepseek-coder-33b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct)
- Any [EleutherAI](https://huggingface.co/EleutherAI) pythia model such as [pythia-6.9b](https://huggingface.co/EleutherAI/pythia-6.9b)
- Any [Peft](https://github.com/huggingface/peft) adapter trained on top of a
model above. To activate, must have `peft` in the model path. Note: If
Expand Down
26 changes: 25 additions & 1 deletion fastchat/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class SeparatorStyle(IntEnum):
ROBIN = auto()
FALCON_CHAT = auto()
CHATGLM3 = auto()
DEEPSEEK_CHAT = auto()


@dataclasses.dataclass
Expand Down Expand Up @@ -224,6 +225,15 @@ def get_prompt(self) -> str:
ret += role + ":"

return ret
elif self.sep_style == SeparatorStyle.DEEPSEEK_CHAT:
seps = [self.sep, self.sep2]
ret = system_prompt
for i, (role, message) in enumerate(self.messages):
if message:
ret += role + ": " + message + seps[i % 2]
else:
ret += role + ":"
return ret
else:
raise ValueError(f"Invalid style: {self.sep_style}")

Expand Down Expand Up @@ -530,7 +540,7 @@ def get_conv_template(name: str) -> Conversation:
# Deepseek code default template
register_conv_template(
Conversation(
name="deepseek",
name="deepseek-coder",
system_template="You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.",
roles=("### Instruction:", "### Response:"),
sep="\n",
Expand Down Expand Up @@ -1265,6 +1275,20 @@ def get_conv_template(name: str) -> Conversation:
)
)

# Deepseek-chat template
# reference: https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat/blob/main/tokenizer_config.json
register_conv_template(
Conversation(
name="deepseek-chat",
system_message="<|begin▁of▁sentence|>", # must add a bos token before first message
roles=("User", "Assistant"),
sep_style=SeparatorStyle.DEEPSEEK_CHAT,
sep="\n\n",
sep2="<|end▁of▁sentence|>",
stop_str="<|end▁of▁sentence|>",
)
)

if __name__ == "__main__":
from fastchat.conversation import get_conv_template

Expand Down
24 changes: 24 additions & 0 deletions fastchat/model/model_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1934,6 +1934,28 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
return get_conv_template("Yi-34b-chat")


class DeepseekCoderAdapter(BaseModelAdapter):
"""The model adapter for deepseek-ai's coder models"""

def match(self, model_path: str):
return "deepseek-coder" in model_path.lower()

def get_default_conv_template(self, model_path: str) -> Conversation:
return get_conv_template("deepseek-coder")


class DeepseekChatAdapter(BaseModelAdapter):
"""The model adapter for deepseek-ai's chat models"""

# Note: that this model will require tokenizer version >= 0.13.3 because the tokenizer class is LlamaTokenizerFast

def match(self, model_path: str):
return "deepseek-llm" in model_path.lower() and "chat" in model_path.lower()

def get_default_conv_template(self, model_path: str) -> Conversation:
return get_conv_template("deepseek-chat")


# Note: the registration order matters.
# The one registered earlier has a higher matching priority.
register_model_adapter(PeftModelAdapter)
Expand Down Expand Up @@ -2005,6 +2027,8 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
register_model_adapter(PygmalionAdapter)
register_model_adapter(MicrosoftOrcaAdapter)
register_model_adapter(YiAdapter)
register_model_adapter(DeepseekCoderAdapter)
register_model_adapter(DeepseekChatAdapter)

# After all adapters, try the default base adapter.
register_model_adapter(BaseModelAdapter)

0 comments on commit decceed

Please sign in to comment.