From 86dfb56daba5052c260a2dd86d296309cfbd4324 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Thu, 6 Jun 2024 13:09:50 -0700 Subject: [PATCH] tools: Respect tie_word_embeddings for qwen2 architecture Some models like qwen2-0.5B use tied embeddings and it looks like all models specify the value in the config. --- tools/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/convert.py b/tools/convert.py index dd01e53..c5995c8 100644 --- a/tools/convert.py +++ b/tools/convert.py @@ -355,7 +355,7 @@ def conv(t): tensors[f"model.layers.{l}.mlp.w3.weight"] = conv(weights[f"model.layers.{l}.mlp.up_proj.weight"]) tensors["model.norm.weight"] = weights["model.norm.weight"].float() - if arch not in ["gemma", "minicpm", "cohere"] or config.get("tie_word_embeddings", None) == False: + if arch not in ["gemma", "minicpm", "cohere", "qwen2"] or config.get("tie_word_embeddings", None) == False: tensors["model.output.weight"] = conv(weights["lm_head.weight"]) elif arch == "internlm2": tensors["model.embed.weight"] = conv(weights["model.tok_embeddings.weight"])