Merge pull request #161 from JustinLin610/add_qwen_moe

Add Qwen 1.5 MoE
stanford-crfm · Apr 9, 2024 · 96d00b9 · 96d00b9
2 parents 51f6f62 + 04b18e8
commit 96d00b9
Showing 1 changed file with 33 additions and 3 deletions.
diff --git a/assets/alibaba.yaml b/assets/alibaba.yaml
@@ -87,16 +87,17 @@
 
 - type: model
   name: Qwen 1.5
-  organization: Qwen AI
+  organization: Qwen Team
   description: Qwen 1.5 is the next iteration in their Qwen series, consisting of
     Transformer-based large language models pretrained on a large volume of data,
     including web texts, books, codes, etc.
   created_date: 2024-02-04
   url: https://qwenlm.github.io/blog/qwen1.5/
   model_card: https://huggingface.co/Qwen/Qwen1.5-72B
   modality: text; text
-  analysis: Evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, MBPP, BBH, CMMLU,
-    all standard English and Chinese benchmarks.
+  analysis: Base models are evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, MBPP,
+    BBH, CMMLU, all standard English and Chinese benchmarks, and chat models are
+    evaluated on Chatbot Arena, AlpacaEval, MT-Bench, etc.
   size: 72B parameters (dense)
   dependencies: []
   training_emissions: unknown
@@ -111,3 +112,32 @@
   prohibited_uses: ''
   monitoring: unknown
   feedback: https://huggingface.co/Qwen/Qwen1.5-72B/discussions
+
+- type: model
+  name: Qwen 1.5 MoE
+  organization: Qwen Team
+  description: Qwen 1.5 is the next iteration in their Qwen series, consisting of
+    Transformer-based large language models pretrained on a large volume of data,
+    including web texts, books, codes, etc. Qwen 1.5 MoE is the MoE model of the
+    Qwen 1.5 series.
+  created_date: 2024-03-28
+  url: https://qwenlm.github.io/blog/qwen-moe/
+  model_card: https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B
+  modality: text; text
+  analysis: Base models are evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, MBPP,
+    BBH, CMMLU, all standard English and Chinese benchmarks, and chat models are
+    evaluated on Chatbot Arena, AlpacaEval, MT-Bench, etc.
+  size: 14B parameters with 2.7B parameters for activation (MoE)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: unknown
+  access: open
+  license:
+    explanation: Model license can be found at https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B/blob/main/LICENSE
+    value: custom
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B/discussions