Merge pull request #154 from stanford-crfm/jonathan/0216-weekly-assets

add form response assets + add weekly assets
stanford-crfm · Mar 29, 2024 · e978cd2 · e978cd2
2 parents e4ef79d + ecd2a32
commit e978cd2
Show file tree

Hide file tree

Showing 39 changed files with 1,080 additions and 12 deletions.
diff --git a/assets/ai21.yaml b/assets/ai21.yaml
@@ -295,3 +295,24 @@
   monthly_active_users: unknown
   user_distribution: unknown
   failures: unknown
+- type: model
+  name: Jamba
+  organization: AI21 Labs
+  description: Jamba is a state-of-the-art, hybrid SSM-Transformer LLM. Jamba is the world’s first production-grade Mamba based model.
+  created_date: 2024-03-28
+  url: https://www.ai21.com/blog/announcing-jamba
+  model_card: https://huggingface.co/ai21labs/Jamba-v0.1
+  modality: text; text
+  analysis: Jamba outperforms or matches other state-of-the-art models in its size class on a wide range of benchmarks.
+  size: 52B parameters (sparse)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: intended for use as a foundation layer for fine tuning, training
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/ai21labs/Jamba-v0.1/discussions
diff --git a/assets/baai.yaml b/assets/baai.yaml
@@ -144,3 +144,24 @@
   prohibited_uses: ''
   monitoring: unknown
   feedback: https://huggingface.co/BAAI/bge-m3/discussions
+- type: model
+  name: EVA-CLIP
+  organization: Beijing Academy of Artificial Intelligence, Tsinghua University
+  description: As of release, EVA-CLIP is the largest and most powerful open-source CLIP model to date, with 18 billion parameters.
+  created_date: 2024-02-06
+  url: https://arxiv.org/pdf/2402.04252.pdf
+  model_card: https://huggingface.co/BAAI/EVA-CLIP-8B-448
+  modality: image, text; text
+  analysis: Evaluated on zero-shot classification performance across multiple image classification benchmarks.
+  size: 18B parameters (dense)
+  dependencies: [CLIP]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 384 A100 40GB GPUs
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: https://huggingface.co/BAAI/EVA-CLIP-8B-448/discussions
diff --git a/assets/beitech.yaml b/assets/beitech.yaml
@@ -22,3 +22,25 @@
   prohibited_uses: ''
   monitoring: none
   feedback: Feedback can be sent to authors via [email protected]
+- type: model
+  name: MiniMA
+  organization: Beijing Institute of Technology
+  description: MiniMA is a smaller finetuned Llama 2 model adapted for Chinese.
+  created_date: 2023-11-13
+  url: https://github.com/GeneZC/MiniMA
+  model_card: https://huggingface.co/GeneZC/MiniMA-3B
+  modality: text; text
+  analysis: Evaluated on standard benchmarks including MMLU, CEval, and DROP.
+  size: 3B parameters (dense)
+  dependencies: [Llama 2]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 8 A100 80G GPUs
+  quality_control: ''
+  access: open
+  license: Llama 2
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknokwn
+  feedback: https://huggingface.co/GeneZC/MiniMA-3B/discussions
+
diff --git a/assets/causallm.yaml b/assets/causallm.yaml
@@ -0,0 +1,24 @@
+---
+- type: model
+  name: CausalLM
+  organization: CausalLM
+  description: CausalLM is an LLM based on the model weights of Qwen and trained on a model architecture identical to LLaMA 2.
+  created_date: 2023-10-21
+  url: https://huggingface.co/CausalLM/14B
+  model_card: https://huggingface.co/CausalLM/14B
+  modality: text; text
+  analysis: Evaluated on standard benchmarks across a range of tasks.
+  size: 14B parameters (dense)
+  dependencies: [Qwen, OpenOrca, Open Platypus]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: 
+    explanation: can be found at https://github.com/rpherrera/WTFPL (HuggingFace lists this to be the license)
+    value: WTFPL
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: none
diff --git a/assets/cerebras.yaml b/assets/cerebras.yaml
@@ -114,3 +114,45 @@
     high stakes.
   monitoring: unknown
   feedback: none
+- type: model
+  name: Bittensor Language Model
+  organization: Cerebras
+  description: Bittensor Language Model is a 3 billion parameter language model with an 8k context length trained on 627B tokens of SlimPajama.
+  created_date: 2023-07-24
+  url: https://www.cerebras.net/blog/btlm-3b-8k-7b-performance-in-a-3-billion-parameter-model/
+  model_card: https://huggingface.co/cerebras/btlm-3b-8k-base
+  modality: text; text
+  analysis: Evaluated on standard LLM benchmarks in comparison to similar-sized models.
+  size: 3B parameters (dense)
+  dependencies: [SlimPajama]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: https://huggingface.co/cerebras/btlm-3b-8k-base/discussions
+- type: dataset
+  name: SlimPajama
+  organization: Cerebras
+  description: As of release, SlimPajama is the largest extensively deduplicated, multi-corpora, open-source dataset for training large language models.
+  created_date: 2023-06-09
+  url: https://huggingface.co/datasets/cerebras/SlimPajama-627B
+  datasheet: https://huggingface.co/datasets/cerebras/SlimPajama-627B
+  modality: text
+  size: 627B tokens
+  sample: []
+  analysis: ''
+  dependencies: [RedPajama-Data]
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: https://huggingface.co/datasets/cerebras/SlimPajama-627B/discussions
diff --git a/assets/cmu.yaml b/assets/cmu.yaml
@@ -1,7 +1,7 @@
 ---
 - type: model
   name: PolyCoder
-  organization: CMU
+  organization: Carnegie Mellon University
   description: PolyCoder is a code model trained on 2.7B parameters based on the
     GPT-2 architecture, which was trained on 249GB of code across 12 programming
     languages on a single machine.
@@ -31,3 +31,24 @@
   prohibited_uses: None
   monitoring: None
   feedback: https://huggingface.co/NinedayWang/PolyCoder-2.7B/discussion
+- type: model
+  name: Moment
+  organization: Carnegie Mellon University, University of Pennsylvania
+  description: Moment is a family of open-source foundation models for general-purpose time-series analysis.
+  created_date: 2024-02-06
+  url: https://arxiv.org/pdf/2402.03885.pdf
+  model_card: none
+  modality: ''
+  analysis: Evaluated on nascent time-series datasets and benchmarks.
+  size: 385M parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: Single A6000 GPU
+  quality_control: ''
+  access: open
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: none
diff --git a/assets/cognitive.yaml b/assets/cognitive.yaml
@@ -0,0 +1,48 @@
+---
+- type: model
+  name: Dolphin 2.2 Yi
+  organization: Cognitive Computations
+  description: Dolphin 2.2 Yi is an LLM based off Yi.
+  created_date: 2023-11-14
+  url: https://erichartford.com/dolphin
+  model_card: https://huggingface.co/cognitivecomputations/dolphin-2_2-yi-34b
+  modality: text; text
+  analysis: none
+  size: 34B parameters (dense)
+  dependencies: [Dolphin, Yi]
+  training_emissions: unknown
+  training_time: 3 days
+  training_hardware: 4 A100 GPUs
+  quality_control: ''
+  access: open
+  license:
+    explanation: can be found at https://huggingface.co/cognitivecomputations/dolphin-2_2-yi-34b/blob/main/LICENSE
+    value: custom
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: https://huggingface.co/cognitivecomputations/dolphin-2_2-yi-34b/discussions
+- type: model
+  name: WizardLM Uncensored
+  organization: Cognitive Computations
+  description: WizardLM Uncensored is WizardLM trained with a subset of the dataset - responses that contained alignment / moralizing were removed.
+  created_date:
+    explanation: release date is not published; estimated to be sometime in either May or June 2023.
+    value: 2023-06-01
+  url: https://huggingface.co/cognitivecomputations/WizardLM-30B-Uncensored
+  model_card: https://huggingface.co/cognitivecomputations/WizardLM-30B-Uncensored
+  modality: text; text
+  analysis: Evaluated on OpenLLM leaderboard.
+  size: 30B parameters (dense)
+  dependencies: [WizardLM]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: https://huggingface.co/cognitivecomputations/WizardLM-30B-Uncensored/discussions
+
diff --git a/assets/cohere.yaml b/assets/cohere.yaml
@@ -474,6 +474,27 @@
   prohibited_uses: ''
   monitoring: none
   feedback: https://huggingface.co/Cohere/Cohere-embed-english-v3.0/discussions
+- type: model
+  name: Aya
+  organization: Cohere for AI, Cohere, Brown University, Carnegie Mellon University, MIT
+  description: Aya is a massively multilingual generative language model that follows instructions in 101 languages of which over 50% are considered as lower-resourced.
+  created_date: 2024-02-12
+  url: https://arxiv.org/pdf/2402.07827.pdf
+  model_card: https://huggingface.co/CohereForAI/aya-101
+  modality: text; text
+  analysis: Evaluated on standard LLM and multilingual benchmarks in comparison to SotA models.
+  size: 13B parameters (dense)
+  dependencies: [mT5, Aya Dataset]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: https://huggingface.co/CohereForAI/aya-101/discussions
 - type: model
   name: Command-R
   organization: Cohere
@@ -495,4 +516,25 @@
   prohibited_uses: ''
   monitoring: ''
   feedback: https://huggingface.co/CohereForAI/c4ai-command-r-v01/discussions
+- type: dataset
+  name: Aya Dataset
+  organization: Cohere for AI, Beijing Academy of Artificial Intelligence, Cohere, Binghamton University
+  description: The Aya Dataset is a dataset that consists of original human-curated prompt-completion pairs written by fluent speakers of 65 languages. 
+  created_date: 2024-02-09
+  url: https://arxiv.org/pdf/2402.06619.pdf
+  datasheet: https://huggingface.co/datasets/CohereForAI/aya_dataset
+  modality: text
+  size: 204k human-annotated prompt-completion pairs
+  sample: []
+  analysis: unknown
+  dependencies: []
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: https://huggingface.co/datasets/CohereForAI/aya_dataset/discussions
 
diff --git a/assets/deepnight.yaml b/assets/deepnight.yaml
@@ -0,0 +1,22 @@
+---
+- type: model
+  name: SaiLY
+  organization: Deepnight Research
+  description: SaiLy is a series/collection of AI Models by Deepnight Research which are highly experimental and uncensored.
+  created_date: 2023-11-04
+  url: https://huggingface.co/deepnight-research/saily_100b
+  model_card: https://huggingface.co/deepnight-research/saily_100b
+  modality: text; text
+  analysis: none
+  size: 100B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: https://huggingface.co/deepnight-research/saily_100b/discussions
diff --git a/assets/deepseek.yaml b/assets/deepseek.yaml
@@ -4,7 +4,7 @@
   organization: Deepseek AI
   description: Deepseek is a 67B parameter model with Grouped-Query Attention trained
     on 2 trillion tokens from scratch.
-  created_date: 2023-11-29
+  created_date: 2023-11-28
   url: https://github.com/deepseek-ai/DeepSeek-LLM
   model_card: https://huggingface.co/deepseek-ai/deepseek-llm-67b-base
   modality: text; text
@@ -51,3 +51,27 @@
   prohibited_uses: none
   monitoring: unknown
   feedback: https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat/discussions
+- type: model
+  name: Deepseek Coder
+  organization: Deepseek AI
+  description: Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese.
+  created_date: 2023-11-03
+  url: https://github.com/deepseek-ai/DeepSeek-Coder
+  model_card: https://huggingface.co/deepseek-ai/deepseek-coder-33b-base
+  modality: text; code
+  analysis: Evaluated on code generation, code completion, cross-file code completion, and program-based math reasoning across standard benchmarks.
+  size: 33B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 8 NVIDIA A100 GPUs and 8 NVIDIA H800 GPUs
+  quality_control: ''
+  access: open
+  license:
+    explanation: Model license can be found at https://github.com/deepseek-ai/DeepSeek-Coder/blob/main/LICENSE-MODEL. Code license is under MIT
+    value: custom
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknkown
+  feedback: https://huggingface.co/deepseek-ai/deepseek-coder-33b-base/discussions
+
diff --git a/assets/dibt.yaml b/assets/dibt.yaml
@@ -16,7 +16,7 @@
   quality_control: ''
   access: open
   license: unknown
-  intended_uses: 	Training and evaluating language models on prompt ranking tasks and as a dataset that can be filtered only to include high-quality prompts. These can serve as seed data for generating synthetic prompts and generations.
+  intended_uses: Training and evaluating language models on prompt ranking tasks and as a dataset that can be filtered only to include high-quality prompts. These can serve as seed data for generating synthetic prompts and generations.
   prohibited_uses: This dataset only contains rankings for prompts, not prompt/response pairs so it is not suitable for direct use for supervised fine-tuning of language models.
   monitoring: ''
   feedback: https://huggingface.co/datasets/DIBT/10k_prompts_ranked/discussions
diff --git a/assets/epfl.yaml b/assets/epfl.yaml
@@ -1,7 +1,7 @@
 ---
 - type: model
   name: MediTron
-  organization: EPFL, Idiap Research Institute, Open Assistant, Yale
+  organization: EPFL, Idiap Research Institute, OpenAssistant, Yale
   description: Meditron is a large-scale medical LLM that remains open-source.
   created_date: 2023-11-27
   url: https://arxiv.org/pdf/2311.16079.pdf

diff --git a/assets/google.yaml b/assets/google.yaml
@@ -1732,6 +1732,27 @@
   monitoring: Google internal monitoring
   feedback: none
 - type: model
+  name: TimesFM
+  organization: Google
+  description: TimesFM is a single forecasting model pre-trained on a large time-series corpus of 100 billion real world time-points.
+  created_date: 2024-02-02
+  url: https://blog.research.google/2024/02/a-decoder-only-foundation-model-for.html
+  model_card: none
+  modality: ''
+  analysis: Evaluated on popular time-series benchmarks.
+  size: 200M parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: closed
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: unknown
+  feedback: none
+
   name: Gemma
   organization: Google
   description: Gemma is a family of lightweight, state-of-the-art open models from Google, based on the Gemini models. They are text-to-text, decoder-only large language models, available in English.
@@ -1754,3 +1775,4 @@
   prohibited_uses: Prohibited uses are specified in the Gemma Prohibited Use Policy here https://ai.google.dev/gemma/prohibited_use_policy
   monitoring: ''
   feedback: https://huggingface.co/google/gemma-7b/discussions
+