From 7e12d6475178deb4e289af3f1369820e2b1ac479 Mon Sep 17 00:00:00 2001 From: Yu Shi Jie Date: Sat, 21 Dec 2024 05:36:33 -0500 Subject: [PATCH] Add Mistral-Large-Instruct-2411 (#1876) --- litgpt/config.py | 20 ++++++++++++++++++++ tutorials/download_model_weights.md | 1 + 2 files changed, 21 insertions(+) diff --git a/litgpt/config.py b/litgpt/config.py index 475f017e50..577a2f3335 100644 --- a/litgpt/config.py +++ b/litgpt/config.py @@ -1663,6 +1663,26 @@ def norm_class(self) -> Type: intermediate_size=28672, ) ) +configs.append( + # https://huggingface.co/mistralai/Mistral-Large-Instruct-2411/blob/main/config.json + dict( + name="Mistral-Large-Instruct-2411", + hf_config=dict(org="mistralai", name="Mistral-Large-Instruct-2411"), + padded_vocab_size=32768, + block_size=32768, + n_layer=88, + n_head=96, + n_embd=12288, + n_query_groups=8, + rotary_percentage=1.0, + parallel_residual=False, + bias=False, + norm_class_name="RMSNorm", + norm_eps=1e-05, + mlp_class_name="LLaMAMLP", + intermediate_size=28672, + ) +) ############ diff --git a/tutorials/download_model_weights.md b/tutorials/download_model_weights.md index 876db1916a..bd46a3564f 100644 --- a/tutorials/download_model_weights.md +++ b/tutorials/download_model_weights.md @@ -171,6 +171,7 @@ mistralai/Mistral-7B-Instruct-v0.3 mistralai/Mistral-7B-v0.1 mistralai/Mistral-7B-v0.3 mistralai/Mistral-Large-Instruct-2407 +mistralai/Mistral-Large-Instruct-2411 mistralai/Mixtral-8x7B-Instruct-v0.1 mistralai/Mixtral-8x7B-v0.1 mistralai/Mixtral-8x22B-Instruct-v0.1