diff --git a/clemcore/__init__.py b/clemcore/__init__.py index 51c120e812..af503075a1 100644 --- a/clemcore/__init__.py +++ b/clemcore/__init__.py @@ -1,15 +1,17 @@ """ Main entry point """ import textwrap -from typing import List, Dict +from typing import List, Dict, Union import os.path import logging import logging.config import yaml from datetime import datetime +import json import clemcore.backends as backends import clemcore.clemgame as clemgame import clemcore.utils.file_utils as file_utils +from clemcore.clemgame import GameSpec BANNER = \ r""" @@ -59,11 +61,11 @@ def list_games(): print(game_name, wrapper.fill(game["description"])) -def run(game_name: str, model_specs: List[backends.ModelSpec], gen_args: Dict, +def run(game: Union[str, Dict, GameSpec], model_specs: List[backends.ModelSpec], gen_args: Dict, experiment_name: str = None, instances_name: str = None, results_dir: str = None): """Run specific model/models with a specified clemgame. Args: - game_name: Name of the game, matching the game's name in the game registry. + game: Name of the game, matching the game's name in the game registry, OR GameSpec-like dict, OR GameSpec. model_specs: A list of backends.ModelSpec instances for the player models to run the game with. gen_args: Text generation parameters for the backend; output length and temperature are implemented for the majority of model backends. @@ -78,72 +80,77 @@ def run(game_name: str, model_specs: List[backends.ModelSpec], gen_args: Dict, model.set_gen_args(**gen_args) # todo make this somehow available in generate method? player_models.append(model) - game_spec = clemgame.select_game(game_name) - game = clemgame.load_game(game_spec, instances_name=instances_name) - logger.info(f'Running {game_spec["game_name"]} (models={player_models if player_models is not None else "see experiment configs"})') - stdout_logger.info(f"Running game {game_spec['game_name']}") - if experiment_name: - logger.info("Only running experiment: %s", experiment_name) - game.filter_experiment.append(experiment_name) - time_start = datetime.now() - game.run(player_models=player_models, results_dir=results_dir) - time_end = datetime.now() - logger.info(f'Running {game_spec["game_name"]} took {str(time_end - time_start)}') + game_specs = clemgame.select_game(game) + print("Matched game specs in registry:", " ".join([game_spec.game_name for game_spec in game_specs])) + for game_spec in game_specs: + game_benchmark = clemgame.load_game(game_spec, instances_name=instances_name) + logger.info( + f'Running {game_spec["game_name"]} (models={player_models if player_models is not None else "see experiment configs"})') + stdout_logger.info(f"Running game {game_spec['game_name']}") + if experiment_name: # leaving this as-is for now, needs discussion conclusions + logger.info("Only running experiment: %s", experiment_name) + game_benchmark.filter_experiment.append(experiment_name) + time_start = datetime.now() + game_benchmark.run(player_models=player_models, results_dir=results_dir) + time_end = datetime.now() + logger.info(f'Running {game_spec["game_name"]} took {str(time_end - time_start)}') + except Exception as e: stdout_logger.exception(e) logger.error(e, exc_info=True) -def score(game_name: str, experiment_name: str = None, results_dir: str = None): +def score(game: Union[str, Dict, GameSpec], experiment_name: str = None, results_dir: str = None): """Calculate scores from a game benchmark run's records and store score files. Args: - game_name: Name of the game, matching the game's name in the game registry. + game: Name of the game, matching the game's name in the game registry, OR GameSpec-like dict, OR GameSpec. experiment_name: Name of the experiment to score. Corresponds to the experiment directory in each player pair subdirectory in the results directory. results_dir: Path to the results directory in which the benchmark records are stored. """ - logger.info(f"Scoring game {game_name}") - stdout_logger.info(f"Scoring game {game_name}") + logger.info(f"Scoring game {game}") + stdout_logger.info(f"Scoring game {game}") if experiment_name: logger.info("Only scoring experiment: %s", experiment_name) - game_spec = clemgame.select_game(game_name) - try: - game = clemgame.load_game(game_spec, do_setup=False) - if experiment_name: - game.filter_experiment.append(experiment_name) - time_start = datetime.now() - game.compute_scores(results_dir) - time_end = datetime.now() - logger.info(f"Scoring {game.game_name} took {str(time_end - time_start)}") - except Exception as e: - stdout_logger.exception(e) - logger.error(e, exc_info=True) - - -def transcripts(game_name: str, experiment_name: str = None, results_dir: str = None): + game_specs = clemgame.select_game(game) + for game_spec in game_specs: + try: + game = clemgame.load_game(game_spec, do_setup=False) + if experiment_name: + game.filter_experiment.append(experiment_name) + time_start = datetime.now() + game.compute_scores(results_dir) + time_end = datetime.now() + logger.info(f"Scoring {game.game_name} took {str(time_end - time_start)}") + except Exception as e: + stdout_logger.exception(e) + logger.error(e, exc_info=True) + + +def transcripts(game: Union[str, Dict, GameSpec], experiment_name: str = None, results_dir: str = None): """Create episode transcripts from a game benchmark run's records and store transcript files. Args: - game_name: Name of the game, matching the game's name in the game registry. + game: Name of the game, matching the game's name in the game registry, OR GameSpec-like dict, OR GameSpec. experiment_name: Name of the experiment to score. Corresponds to the experiment directory in each player pair subdirectory in the results directory. results_dir: Path to the results directory in which the benchmark records are stored. """ - logger.info(f"Transcribing game {game_name}") - stdout_logger.info(f"Transcribing game {game_name}") + logger.info(f"Transcribing game {game}") + stdout_logger.info(f"Transcribing game {game}") if experiment_name: logger.info("Only transcribing experiment: %s", experiment_name) - game_spec = clemgame.select_game(game_name) - try: - game = clemgame.load_game(game_spec, do_setup=False) - if experiment_name: - game.filter_experiment.append(experiment_name) - time_start = datetime.now() - game.build_transcripts(results_dir) - time_end = datetime.now() - logger.info(f"Building transcripts for {game.game_name} took {str(time_end - time_start)}") - except Exception as e: - stdout_logger.exception(e) - logger.error(e, exc_info=True) - + game_specs = clemgame.select_game(game) + for game_spec in game_specs: + try: + game = clemgame.load_game(game_spec, do_setup=False) + if experiment_name: + game.filter_experiment.append(experiment_name) + time_start = datetime.now() + game.build_transcripts(results_dir) + time_end = datetime.now() + logger.info(f"Building transcripts for {game.game_name} took {str(time_end - time_start)}") + except Exception as e: + stdout_logger.exception(e) + logger.error(e, exc_info=True) diff --git a/clemcore/backends/huggingface_multimodal_api.py b/clemcore/backends/huggingface_multimodal_api.py index f59d378f05..601ad11553 100644 --- a/clemcore/backends/huggingface_multimodal_api.py +++ b/clemcore/backends/huggingface_multimodal_api.py @@ -1,44 +1,76 @@ -"""Backend using HuggingFace transformers for open-weight multimodal models.""" - -import logging +""" +Backend using HuggingFace transformers for open-weight multimodal models. +""" from typing import List, Dict, Tuple, Any import torch +import clemcore.backends as backends from PIL import Image import requests -from transformers import AutoProcessor, AutoModelForVision2Seq, IdeficsForVisionText2Text, AutoConfig +from transformers import AutoTokenizer, AutoConfig from jinja2 import Template - -import clemcore.backends as backends - -# Define a map to load model from transformers Auto Classes -# IdeficsForVisionText2Text is not yet supported by any Auto Class -MODEL_TYPE_MAP = { - "Idefics": IdeficsForVisionText2Text, - "Vision2Seq": AutoModelForVision2Seq -} - +import warnings +import importlib +import logging FALLBACK_CONTEXT_SIZE = 256 logger = logging.getLogger(__name__) - def get_context_limit(model_spec: backends.ModelSpec) -> int: - """Get the context limit of the model. + """ + Get the context limit of the model. + Args: - model_spec: Contains definitions about the model to be used. + model_spec (backends.ModelSpec): Contains definitions/args for the model. + Returns: - Context limit of the model. + int: Context limit of the model. + + Raises: + Warning: If no context limit is found, a warning is raised and the fallback value is used. """ hf_model_str = model_spec['huggingface_id'] - model_config = AutoConfig.from_pretrained(hf_model_str) - - # Some models have 'max_position_embeddings' others have - 'max_sequence_length' - if hasattr(model_config, "text_config"): - context = model_config.text_config.max_position_embeddings - elif hasattr(model_config, "max_sequence_length"): - context = model_config.max_sequence_length + if 'trust_remote_code' in model_spec: + model_config = AutoConfig.from_pretrained(hf_model_str, trust_remote_code=True) else: + model_config = AutoConfig.from_pretrained(hf_model_str) + + def find_context_limit(config) -> int: + """Recursively search for max_sequence_length or max_position_embeddings.""" + # Check if the desired keys are directly in the config + if hasattr(config, 'max_position_embeddings'): + return config.max_position_embeddings + if hasattr(config, 'max_sequence_length'): + return config.max_sequence_length + + # Recursively search through the attributes of the config object + for attr in dir(config): + # Skip callable attributes and private attributes + if attr.startswith('_') or callable(getattr(config, attr)): + continue + + value = getattr(config, attr) + if isinstance(value, dict): + result = find_context_limit(value) + if result is not None: + return result + elif isinstance(value, list): + for item in value: + if isinstance(item, dict): + result = find_context_limit(item) + if result is not None: + return result + elif hasattr(value, '__dict__'): # Check if the value is an object with attributes + result = find_context_limit(value) + if result is not None: + return result + return None + + context = find_context_limit(model_config) + + if context is None: + warnings.warn(f"No context limit found for model - {hf_model_str}. Using fallback value: {FALLBACK_CONTEXT_SIZE}.") context = FALLBACK_CONTEXT_SIZE + logger.info(f"Context limit for model - {hf_model_str} is {context}") return context @@ -46,191 +78,130 @@ def get_context_limit(model_spec: backends.ModelSpec) -> int: def check_context_limit(context_size: int, prompt_tokens: list, max_new_tokens: int = 100) -> Tuple[ bool, int, int, int]: - """External context limit check. + """ + Checks if the context limit is exceeded. + Args: - context_size: max_sequence_length/max_position_embeddings of the model - prompt_tokens: List of prompt token IDs. - max_new_tokens: How many tokens to generate ('at most', but no stop sequence is defined). + context_size (int): The maximum sequence length or position embeddings of the model. + prompt_tokens (list): A list of prompt token IDs. + max_new_tokens (int, optional): The maximum number of tokens to generate. Defaults to 100. + Returns: - Tuple with - Bool: True if context limit is not exceeded, False if too many tokens - Number of tokens for the given messages and maximum new tokens - Number of tokens of 'context space left' - Total context token limit + Tuple[bool, int, int, int]: A tuple containing: + - bool: True if the context limit is not exceeded, False if too many tokens. + - int: The total number of tokens used (prompt + new tokens). + - int: The number of tokens of 'context space left'. + - int: The total context token limit. """ prompt_size = len(prompt_tokens) - tokens_used = prompt_size + max_new_tokens # context includes tokens to be generated + tokens_used = prompt_size + max_new_tokens tokens_left = context_size - tokens_used fits = tokens_used <= context_size return fits, tokens_used, tokens_left, context_size +def import_method(method_path: str): + """Import the method from the specified module path. -def load_processor(model_spec: backends.ModelSpec) -> AutoProcessor: - """Load processor from AutoProcessor a specific model. - Example: LlavaProcessor Args: - model_spec: A dictionary that defines the model to be used, loaded from Model Registry + model_type_str (str): The method path separated by dots. Example - transformers.AutoModel or backends.multimodal_utils.device_map + Returns: - Processor for the specific model. + type: The imported method. + + Raises: + ImportError: If the method cannot be imported. """ - hf_model_str = model_spec['huggingface_id'] # Get the model name + try: + module_path, method_name = method_path.rsplit('.', 1) + module = importlib.import_module(module_path) + return getattr(module, method_name) + except (ImportError, AttributeError) as e: + raise ImportError(f"Could not import method '{method_name}' from module '{module_path}'.") from e - if hasattr(model_spec, 'not_fast'): - # Only used by LLaVA 1.6 34B (Throws mismatch token error when use_fast is not set to False) - processor = AutoProcessor.from_pretrained(hf_model_str, use_fast=False, device_map="auto", verbose=False) - else: - processor = AutoProcessor.from_pretrained(hf_model_str, device_map="auto", verbose=False) - logger.info(f'Loading Processor for model : {model_spec.model_name}') - return processor +def load_processor(model_spec: backends.ModelSpec): + """ + Load processor from AutoProcessor/AutoTokenizer for a specific model (Example - LlavaProcessor). -def load_model(model_spec: backends.ModelSpec) -> Any: - """Load a specific model. Args: - model_spec: A dictionary that defines the model to be used, loaded from Model Registry + model_spec (backends.ModelSpec): A dictionary that defines the model to be used, loaded from Model Registry. + Returns: - The specific model. + Processor/Tokenizer for the specific model. + + Raises: + ImportError: If the processor type cannot be imported. """ - logger.info(f'Start loading huggingface model weights: {model_spec.model_name}') hf_model_str = model_spec['huggingface_id'] # Get the model name + processor_class_str = model_spec['processor_class'] # Processor type - AutoProcessor/AutoTokenizer + processor_config = model_spec['processor_config'] # Processor kwargs - model_type = MODEL_TYPE_MAP[model_spec['model_type']] # Use the appropriate Auto class to load the model + processor_class = import_method(processor_class_str) - model = model_type.from_pretrained(hf_model_str, device_map="auto", torch_dtype="auto") # Load the model + if "trust_remote_code" in model_spec: + processor = processor_class.from_pretrained(hf_model_str, trust_remote_code=True, **processor_config) # Load the processor with trust_remote_code=True + else: + processor = processor_class.from_pretrained(hf_model_str, **processor_config) # Load the processor with defined args - # check if model's generation_config has pad_token_id set: - if not model.generation_config.pad_token_id: - # set pad_token_id to tokenizer's eos_token_id to prevent excessive warnings: - model.generation_config.pad_token_id = model.generation_config.eos_token_id # Same as processor.tokenizer.pad_token_id + logger.info(f'Loading Processor for model : {model_spec.model_name}') - logger.info(f"Finished loading huggingface model: {model_spec.model_name}") - logger.info(f"Device Map: {model.hf_device_map}") + return processor - return model +def load_model(model_spec: backends.ModelSpec): + """ + Load a specific model. -def load_image(image: str) -> Image: - """Load an image based on a given local path or URL. Args: - image: Image path/url. + model_spec (backends.ModelSpec): A dictionary that defines the model to be used, loaded from Model Registry. + Returns: - The loaded PIL Image. + backends.Model: The specific model. + + Raises: + ImportError: If the model class or device map (if custom) cannot be imported. """ - if image.startswith('http') or image.startswith('https'): - image = Image.open(requests.get(image, stream=True).raw).convert('RGB') + logger.info(f'Start loading huggingface model weights: {model_spec.model_name}') + hf_model_str = model_spec['huggingface_id'] # Get the model name + model_class_str = model_spec['model_class'] # Model Loader Class + model_config = model_spec['model_config'] # Model kwargs + + model_class = import_method(model_class_str) + + # Check if a custom device_map split is provided and adjust device_map accordingly + if 'device_map' in model_config and not model_config['device_map'] == 'auto': + logger.info(f"Loading Custom device map for model: {hf_model_str}") + split_model = import_method(model_config['device_map']) + device_map = split_model(model_spec['model_name']) + model_config['device_map'] = device_map + + if 'trust_remote_code' in model_spec: + model = model_class.from_pretrained(hf_model_str, trust_remote_code=True, **model_config) # Load the model using from_pretrained else: - image = Image.open(image).convert('RGB') - - return image + model = model_class.from_pretrained(hf_model_str, **model_config) # Load the model using from_pretrained + # Check if model's generation_config has pad_token_id set: + if not model.generation_config.pad_token_id: + # Set pad_token_id to tokenizer's eos_token_id to prevent excessive warnings: + model.generation_config.pad_token_id = model.generation_config.eos_token_id # Same as processor.tokenizer.pad_token_id -def get_images(messages: list[Dict]) -> list: - """Return loaded images from messages. - Args: - messages: A list of messages passed to the model. - Returns: - A list of PIL Image objects. - """ - # Collect image links/file locations mentioned in messages - images = [] - for message in messages: - if 'image' in message: - if type(message['image']) == list: - for img in message['image']: - images.append(img) - else: - images.append(message['image']) - - # Return None if no image is passed - # Use AutoTokenizer to generate output and not AutoProcessor, as only text is passed. - if not images: - return None - - # Load Images - loaded_images = [] - for img in images: - image = load_image(img) - loaded_images.append(image) + logger.info(f"Finished loading huggingface model: {model_spec.model_name}") + logger.info(f"Device Map: {model.hf_device_map}") - return loaded_images + return model -# Separate Input and Output generation for Idefics -# Input is required for context check -def generate_idefics_input(messages: list[Dict]) -> Tuple[list, str]: - """Return inputs specific to the format of Idefics. - Args: - messages: A list[Dict] type object passed to the backend containing 'role', 'content' and 'image'. - Returns: - Tuple of the Idefics input list and the Idefics input text. +def check_multiple_image(messages: List[Dict]): """ - # Create a list containing the prompt text and images specific to Idefics input - # Refer - https://huggingface.co/HuggingFaceM4/idefics-80b-instruct - - # Use idefics_input as is for input to the model - # Use idefics_text, that contains everything from idefics_input, apart from image_urls/loaded_image, used for context check - idefics_input = [] - idefics_text = "" - for m in messages: - if m['role'] == 'user': - idefics_input.append('\nUser: ' + m['content']) - idefics_text += 'User: ' + m['content'] - if 'image' in m.keys(): - if type(m['image']) == list: # Check if multiple images are passed, append accordingly - for im in m['image']: - loaded_im = load_image(im) - idefics_input.append(loaded_im) - else: - idefics_input.append(m['image']) - idefics_input.append('') - idefics_text += '' - elif m['role'] == 'assistant': - idefics_input.append('\nAssistant: ' + m['content']) - idefics_input.append('') - idefics_text += '\nAssistant: ' + m['content'] - idefics_text += '' - idefics_input.append('\nAssistant:') - idefics_input = [idefics_input] - - return idefics_input, idefics_text - - -def generate_idefics_output(messages: list[Dict], - model: IdeficsForVisionText2Text, - processor: AutoProcessor, - max_tokens: int, - device) -> list[str]: - """Return generated text from Idefics model. - Args: - messages: A list[Dict] type object passed to the backend containing 'role', 'content' and 'image'. - model: Idefics model. - processor: Idefics processor. - max_tokens: The maximum number of tokens to generate. - device: Processing device - cuda/CPU. - Returns: - The generated text as a list of strings. - """ - idefics_input, _ = generate_idefics_input(messages=messages) - inputs = processor(idefics_input, add_end_of_utterance_token=False, return_tensors="pt").to(device) - - # Generation args for Idefics - exit_condition = processor.tokenizer("", add_special_tokens=False).input_ids - bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids - - generated_ids = model.generate(**inputs, eos_token_id=exit_condition, bad_words_ids=bad_words_ids, - max_new_tokens=max_tokens) - generated_text = processor.batch_decode(generated_ids) + Check if a single message contains multiple images. - return generated_text - - -def check_multiple_image(messages: List[Dict]) -> bool: - """Return True if a single message contains multiple images. Args: - messages: A list[Dict] type object passed to the backend containing 'role', 'content' and 'image'. + messages (List[Dict]): A list of dictionaries passed to the backend, + each containing 'role', 'content', and possibly 'image'. + Returns: - True if a single message contains multiple images; False otherwise. + bool: True if any message contains multiple images, False otherwise. """ has_multiple_images = False for msg in messages: @@ -242,81 +213,92 @@ def check_multiple_image(messages: List[Dict]) -> bool: class HuggingfaceMultimodal(backends.Backend): - """Backend class for multimodal models locally run via HuggingFace transformers.""" def __init__(self): super().__init__() def get_model_for(self, model_spec: backends.ModelSpec) -> backends.Model: - """Get a HuggingfaceMultimodalModel instance with the passed model and settings. - Will load all required data for using the model upon initialization. + """Get the model for the specified model specification. + Args: - model_spec: The ModelSpec for the model. + model_spec (backends.ModelSpec): The model specification. + Returns: - The Model class instance of the model. + backends.Model: The model instance. """ return HuggingfaceMultimodalModel(model_spec) class HuggingfaceMultimodalModel(backends.Model): - """Class for loaded multimodal HuggingFace transformers models ready for generation.""" + def __init__(self, model_spec: backends.ModelSpec): - """ - Args: - model_spec: A ModelSpec instance specifying the model. - """ super().__init__(model_spec) # Load instance variable used for evey model self.device = "cuda" if torch.cuda.is_available() else "cpu" - self.model_type = model_spec['model_type'] - self.model_name = model_spec['model_name'] self.processor = load_processor(model_spec) self.multimodal_model = load_model(model_spec) - self.split_prefix = model_spec['output_split_prefix'] self.context_size = get_context_limit(model_spec) + self.model_name = model_spec['model_name'] - # Type cast model_spec to a Dictionary, for cleaner loading of variables - model_spec_dict = vars(model_spec) - # Load model specific instance variables - self.template = model_spec_dict.get('custom_chat_template', None) - self.cull = model_spec_dict.get('eos_to_cull', None) - self.supports_multiple_images = model_spec_dict.get('supports_multiple_images', False) - self.padding = model_spec_dict.get('padding', False) - self.idefics = 'idefics' in model_spec['model_name'] + self.split_prefix = model_spec.output_split_prefix if hasattr(model_spec, 'output_split_prefix') else "" + self.template = model_spec.custom_chat_template if hasattr(model_spec, 'custom_chat_template') else None + self.premade_template = True if hasattr(model_spec, 'premade_chat_template') else False + self.cull = model_spec.eos_to_cull if hasattr(model_spec, 'eos_to_cull') else None + self.supports_multiple_images = model_spec.supports_multiple_images if hasattr(model_spec, 'supports_multiple_images') else False + self.do_sample = model_spec.do_sample if hasattr(model_spec, 'do_sample') else None + self.prompt_method = model_spec.prompt if hasattr(model_spec, 'prompt') else None + self.response_method = model_spec.response if hasattr(model_spec, 'response') else None def generate_response(self, messages: List[Dict]) -> Tuple[Any, Any, str]: - """Generate a response with the loaded multimodal HuggingFace transformers model. + """Generate a response based on the provided messages. + Args: - messages: A message history. For example: - [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "Who won the world series in 2020?"}, - {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."}, - {"role": "user", "content": "Where was it played?"} - ] + messages (List[Dict]): A list of message dictionaries, each containing 'role', 'content' and possibly 'images'. + Returns: - The response message generated by the loaded multimodal HuggingFace transformers model. + Tuple[Any, Any, str]: A tuple containing: + - dict: The prompt for the model. + - dict: The response from the model. + - str: The processed response text. + + Raises: + AttributeError: If neither 'tokenizer.tokenize' nor 'processor.tokenize' exists. + backends.ContextExceededError: If the context token limit is exceeded. + ValueError: If neither custom chat template or custom prompt method is provided """ # Check to see if game passes multiple images in a single turn # Proceed only if model supports multiple images, else return blanks for prompt, response and response_text has_multiple_images = check_multiple_image(messages=messages) if has_multiple_images and not self.supports_multiple_images: - print(f"Multiple images not supported in a single turn for model {self.model_name}") + logger.warning(f"Multiple images not supported in a single turn for model {self.model_name}") return "", {"response": ""}, "" + prompt_kwargs = { + 'model': self.multimodal_model, + 'processor': self.processor, + 'device': self.device, + } prompt_text = "" # Get input prompt by applying jinja template, if template is provided if self.template: template_str = self.template template = Template(template_str) prompt_text = template.render(messages=messages) + elif self.prompt_method: + prompt_method = import_method(self.prompt_method) + prompt_text = prompt_method(messages, **prompt_kwargs) + else: + raise ValueError("Neither template nor prompt method is provided.") - # Get input prompt if model is of type IdeficsForVisionText2Text - if self.idefics: - _, prompt_text = generate_idefics_input(messages=messages) - # Check context limit - prompt_tokens = self.processor.tokenizer.tokenize(prompt_text) + # Check context limit based on if AutoProcessor is loaded or AutoTokenizer + if hasattr(self.processor, 'tokenize'): + prompt_tokens = self.processor.tokenize(prompt_text) + elif hasattr(self.processor.tokenizer, 'tokenize'): + prompt_tokens = self.processor.tokenizer.tokenize(prompt_text) + else: + raise AttributeError("Neither 'tokenizer.tokenize' nor 'processor.tokenize' exists.") + context_check = check_context_limit(self.context_size, prompt_tokens, max_new_tokens=self.get_max_tokens()) if not context_check[0]: # if context is exceeded, context_check[0] is False logger.info(f"Context token limit for {self.model_spec.model_name} exceeded: " @@ -326,34 +308,33 @@ def generate_response(self, messages: List[Dict]) -> Tuple[Any, Any, str]: tokens_used=context_check[1], tokens_left=context_check[2], context_size=context_check[3]) - # Get a list of images [as input to the Processor] - images = get_images(messages) - - # Generate the output - if self.idefics: - generated_text = generate_idefics_output(messages=messages, - model=self.multimodal_model, - processor=self.processor, - max_tokens=self.get_max_tokens(), - device=self.device) - - else: - if not images: # If no images are present in the history + current utterance, use tokenizer to get inputs - inputs = self.processor.tokenizer(prompt_text, return_tensors="pt").to(self.device) - else: - inputs = self.processor(prompt_text, images=images, return_tensors="pt").to(self.device) - model_output = self.multimodal_model.generate(**inputs, max_new_tokens=self.get_max_tokens()) - generated_text = self.processor.batch_decode(model_output, skip_special_tokens=True) - prompt = {"inputs": prompt_text, "max_new_tokens": self.get_max_tokens(), "temperature": self.get_temperature()} + response_method = import_method(self.response_method) + response_kwargs = { + 'model': self.multimodal_model, + 'processor': self.processor, + 'device': self.device, + 'do_sample': self.do_sample, + 'messages': messages, + 'max_tokens': self.get_max_tokens(), + 'model_name': self.model_name + } + generated_response = response_method(**response_kwargs) + + logger.info("*" * 50 + " Generated Response " + "*" * 50) + logger.info(f"\n : {generated_response} \n") # Store generated text - response = {"response": generated_text} + response = {"response": generated_response} - response_text = generated_text[0].split(self.split_prefix)[-1] # Get the last assistant response + # Check if split_prefix is not empty before splitting + response_text = generated_response + if self.split_prefix: + response_text = generated_response.split(self.split_prefix)[-1] # Get the last assistant response if self.cull: rt_split = response_text.split(self.cull) # Cull from End of String token response_text = rt_split[0] response_text = response_text.strip() + return prompt, response, response_text \ No newline at end of file diff --git a/clemcore/backends/model_registry.json b/clemcore/backends/model_registry.json index d33b75b80b..304e6ad0dc 100644 --- a/clemcore/backends/model_registry.json +++ b/clemcore/backends/model_registry.json @@ -1,1549 +1,1814 @@ [ - { - "model_name": "openchat", - "model_id": "openchat-3.5-0106", - "backend": "openai_compatible", - "release_date": "2024-01-06", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "codellama-34b", - "model_id": "codellama-34b-instruct", - "backend": "openai_compatible", - "release_date": "2023-08-24", - "open_weight": true, - "parameters": "34B" - }, - { - "model_name": "Llama-3-70B-Instruct-Anyscale", - "model_id": "meta-llama/Meta-Llama-3-70B-Instruct", - "backend": "openai_compatible", - "release_date": "2024-04-18", - "open_weight": true, - "parameters": "70B" - }, - { - "model_name": "Llama-3-70B-Together.ai", - "model_id": "meta-llama/Llama-3-70b-chat-hf", - "backend": "openai_compatible", - "release_date": "2024-04-18", - "open_weight": true, - "parameters": "70B" - }, - { - "model_name": "Llama-3-8B-Together.ai", - "model_id": "meta-llama/Llama-3-8b-chat-hf", - "backend": "openai_compatible", - "release_date": "2024-04-18", - "open_weight": true, - "parameters": "8B" - }, - { - "model_name": "Llama-3-8B-Instruct-Anyscale", - "model_id": "meta-llama/Meta-Llama-3-8B-Instruct", - "backend": "openai_compatible", - "release_date": "2024-04-18", - "open_weight": true, - "parameters": "8B" - }, - { - "model_name": "Meta-Llama-3.1-405B-Instruct-Turbo", - "model_id": "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", - "backend": "openai_compatible", - "release_date": "2024-07-23", - "open_weight": true, - "parameters": "405B" - }, - { - "model_name": "Mixtral-8x22B-Instruct-v0.1", - "model_id": "mistralai/Mixtral-8x22B-Instruct-v0.1", - "backend": "openai_compatible", - "release_date": "2024-04-17", - "open_weight": true, - "parameters": "141B" - }, - { - "model_name": "Mixtral-8x7B-Instruct-v0.1", - "model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", - "backend": "openai_compatible", - "release_date": "2023-12-11", - "open_weight": true, - "parameters": "46.7B" - }, - { - "model_name": "fsc-openchat-3.5-0106", - "model_id": "openchat-3.5-0106", - "backend": "openai_compatible", - "release_date": "2024-01-06", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "fsc-codellama-34b-instruct", - "model_id": "codellama-34b-instruct", - "backend": "openai_compatible", - "release_date": "2023-08-24", - "open_weight": true, - "parameters": "34B" - }, - { - "model_name": "gpt-4-1106-vision-preview", - "model_id": "gpt-4-1106-vision-preview", - "backend": "openai", - "supports_images": true, - "support_multiple_images": true, - "release_date": "2023-11-06", - "open_weight": false, - "parameters": "", - "estimated_parameters": "1.76T" - }, - { - "model_name": "gpt-4o-2024-05-13", - "model_id": "gpt-4o-2024-05-13", - "backend": "openai", - "supports_images": true, - "support_multiple_images": true, - "release_date": "2024-05-13", - "open_weight": false, - "parameters": "", - "estimated_parameters": "200B" - }, - { - "model_name": "gpt-4o-2024-08-06", - "model_id": "gpt-4o-2024-08-06", - "backend": "openai", - "supports_images": true, - "support_multiple_images": true, - "release_date": "2024-08-06", - "open_weight": false, - "parameters": "", - "estimated_parameters": "200B" - }, - { - "model_name": "gpt-4o-mini-2024-07-18", - "model_id": "gpt-4o-mini-2024-07-18", - "backend": "openai", - "supports_images": true, - "support_multiple_images": true, - "release_date": "2024-07-18", - "open_weight": false, - "parameters": "", - "estimated_parameters": "8B" - }, - { - "model_name": "gpt-4-turbo-2024-04-09", - "model_id": "gpt-4-turbo-2024-04-09", - "backend": "openai", - "release_date": "2024-04-09", - "open_weight": false, - "parameters": "1.76T" - }, - { - "model_name": "gpt-4-1106-preview", - "model_id": "gpt-4-1106-preview", - "backend": "openai", - "release_date": "2023-11-06", - "open_weight": false, - "parameters": "1.76T" - }, - { - "model_name": "gpt-4-0125-preview", - "model_id": "gpt-4-0125-preview", - "backend": "openai", - "release_date": "2024-01-25", - "open_weight": false, - "parameters": "1.76T" - }, - { - "model_name": "o1-preview-2024-09-12", - "model_id": "o1-preview-2024-09-12", - "backend": "openai", - "release_date": "2024-09-12", - "open_weight": false, - "parameters": "", - "o1_model": true - }, - { - "model_name": "o1-mini-2024-09-12", - "model_id": "o1-mini-2024-09-12", - "backend": "openai", - "release_date": "2024-09-12", - "open_weight": false, - "parameters": "", - "o1_model": true - }, - { - "model_name": "gpt-3.5-turbo-0125", - "model_id": "gpt-3.5-turbo-0125", - "backend": "openai", - "release_date": "2024-01-25", - "open_weight": false, - "parameters": "175B" - }, - { - "model_name": "gpt-4-0613", - "model_id": "gpt-4-0613", - "backend": "openai", - "release_date": "2023-06-13", - "open_weight": false, - "parameters": "1.76T" - }, - { - "model_name": "gpt-4-0314", - "model_id": "gpt-4-0314", - "backend": "openai", - "release_date": "2023-03-14", - "open_weight": false, - "parameters": "1.76T" - }, - { - "model_name": "gpt-3.5-turbo-1106", - "model_id": "gpt-3.5-turbo-1106", - "backend": "openai", - "release_date": "2023-11-06", - "open_weight": false, - "parameters": "175B" - }, - { - "model_name": "gpt-3.5-turbo-0613", - "model_id": "gpt-3.5-turbo-0613", - "backend": "openai", - "release_date": "2023-06-13", - "open_weight": false, - "parameters": "175B" - }, - { - "model_name": "mistral-medium-2312", - "model_id": "mistral-medium-2312", - "backend": "mistral", - "release_date": "2023-12-01", - "open_weight": true, - "parameters": "", - "estimated_parameters": "141B" - }, - { - "model_name": "mistral-tiny-2312", - "model_id": "mistral-tiny-2312", - "backend": "mistral", - "release_date": "2023-12-01", - "open_weight": true, - "parameters": "", - "estimated_parameters": "7B" - }, - { - "model_name": "mistral-small-2312", - "model_id": "mistral-small-2312", - "backend": "mistral", - "release_date": "2023-12-01", - "open_weight": true, - "parameters": "", - "estimated_parameters": "46.7B" - }, - { - "model_name": "mistral-large-2402", - "model_id": "mistral-large-2402", - "backend": "mistral", - "release_date": "2024-02-01", - "open_weight": true, - "parameters": "123B" - }, - { - "model_name": "command", - "model_id": "command", - "backend": "cohere", - "release_date": "2022-12-01", - "open_weight": false, - "parameters": "", - "estimated_parameters": "" - }, - { - "model_name": "command-r", - "model_id": "command-r", - "backend": "cohere", - "release_date": "2024-03-01", - "open_weight": true, - "parameters": "35B" - }, - { - "model_name": "command-r-plus", - "model_id": "command-r-plus", - "backend": "cohere", - "release_date": "2024-04-01", - "open_weight": true, - "parameters": "104B" - }, - { - "model_name": "command-light", - "model_id": "command-light", - "backend": "cohere", - "release_date": "2022-12-01", - "open_weight": false, - "parameters": "", - "estimated_parameters": "" - }, - { - "model_name": "claude-v1.3", - "model_id": "claude-v1.3", - "backend": "anthropic", - "release_date": "2023-04-18", - "open_weight": false, - "parameters": "", - "estimated_parameters": "" - }, - { - "model_name": "claude-v1.3-100k", - "model_id": "claude-v1.3-100k", - "backend": "anthropic", - "release_date": "2023-03-18", - "open_weight": false, - "parameters": "", - "estimated_parameters": "" - }, - { - "model_name": "claude-instant-1.2", - "model_id": "claude-instant-1.2", - "backend": "anthropic", - "release_date": "2023-08-09", - "open_weight": false, - "parameters": "", - "estimated_parameters": "" - }, - { - "model_name": "claude-2", - "model_id": "claude-2", - "backend": "anthropic", - "release_date": "2023-07-11", - "open_weight": false, - "parameters": "137B" - }, - { - "model_name": "claude-2.1", - "model_id": "claude-2.1", - "backend": "anthropic", - "release_date": "2023-11-21", - "open_weight": false, - "parameters": "137B" - }, - { - "model_name": "claude-3-opus-20240229", - "model_id": "claude-3-opus-20240229", - "backend": "anthropic", - "supports_images": true, - "support_multiple_images": true, - "release_date": "2024-02-29", - "open_weight": false, - "parameters": "2T" - }, - { - "model_name": "claude-3-sonnet-20240229", - "model_id": "claude-3-sonnet-20240229", - "backend": "anthropic", - "supports_images": true, - "support_multiple_images": true, - "release_date": "2024-02-29", - "open_weight": false, - "parameters": "70B" - }, - { - "model_name": "claude-3-haiku-20240307", - "model_id": "claude-3-haiku-20240307", - "backend": "anthropic", - "supports_images": true, - "support_multiple_images": true, - "release_date": "2024-03-07", - "open_weight": false, - "parameters": "20B" - }, - { - "model_name": "claude-3-5-sonnet-20240620", - "model_id": "claude-3-5-sonnet-20240620", - "backend": "anthropic", - "supports_images": true, - "support_multiple_images": true, - "release_date": "2024-06-20", - "open_weight": false, - "parameters": "", - "estimated_parameters": "" - }, - { - "model_name": "gemini-1.0-pro-001", - "model_id": "gemini-1.0-pro-001", - "backend": "google", - "release_date": "2024-02-15", - "open_weight": false, - "parameters": "", - "estimated_parameters": "" - }, - { - "model_name": "gemini-1.0-pro-002", - "model_id": "gemini-1.0-pro-002", - "backend": "google", - "release_date": "2024-04-09", - "open_weight": false, - "parameters": "", - "estimated_parameters": "" - }, - { - "model_name": "gemini-1.0-pro-vision-001", - "model_id": "gemini-1.0-pro-vision-latest", - "backend": "google", - "supports_images": true, - "support_multiple_images": true, - "release_date": "2024-02-15", - "open_weight": false, - "parameters": "", - "estimated_parameters": "" - }, - { - "model_name": "gemini-1.5-flash-001", - "model_id": "gemini-1.5-flash-001", - "backend": "google", - "supports_images": true, - "support_multiple_images": true, - "release_date": "2024-05-24", - "open_weight": false, - "parameters": "", - "estimated_parameters": "" - }, - { - "model_name": "gemini-1.5-pro-001", - "model_id": "gemini-1.5-pro-001", - "backend": "google", - "supports_images": true, - "support_multiple_images": true, - "release_date": "2024-05-24", - "open_weight": false, - "parameters": "1.5T" - }, - { - "model_name": "gemini-1.5-pro-002", - "model_id": "gemini-1.5-pro-002", - "backend": "google", - "supports_images": true, - "support_multiple_images": true, - "release_date": "2024-09-24", - "open_weight": false, - "parameters": "1.5T" - }, - { - "model_name": "gemini-1.5-flash-002", - "model_id": "gemini-1.5-flash-002", - "backend": "google", - "supports_images": true, - "support_multiple_images": true, - "release_date": "2024-09-24", - "open_weight": false, - "parameters": "", - "estimated_parameters": "" - }, - { - "model_name": "gemini-1.5-flash-8b-001", - "model_id": "gemini-1.5-flash-8b-001", - "backend": "google", - "supports_images": true, - "support_multiple_images": true, - "release_date": "2024-10-03", - "open_weight": false, - "parameters": "8B" - }, - { - "model_name": "luminous-supreme-control", - "model_id": "luminous-supreme-control", - "backend": "alephalpha", - "release_date": "2023-02-13", - "open_weight": false, - "parameters": "70B" - }, - { - "model_name": "luminous-supreme", - "model_id": "luminous-supreme", - "backend": "alephalpha", - "release_date": "2022-08-15", - "open_weight": false, - "parameters": "70B" - }, - { - "model_name": "luminous-extended", - "model_id": "luminous-extended", - "backend": "alephalpha", - "release_date": "2022-06-15", - "open_weight": false, - "parameters": "30B" - }, - { - "model_name": "luminous-base", - "model_id": "luminous-base", - "backend": "alephalpha", - "release_date": "2022-04-14", - "open_weight": false, - "parameters": "13B" - }, - { - "model_name": "Mistral-7B-Instruct-v0.1", - "backend": "huggingface_local", - "huggingface_id": "mistralai/Mistral-7B-Instruct-v0.1", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2023-09-27", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "sheep-duck-llama-2-70b-v1.1", - "backend": "huggingface_local", - "huggingface_id": "Riiid/sheep-duck-llama-2-70b-v1.1", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### User:\\n' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'system' %}{{ '### System:\\n' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ '### Assistant:\\n' + message['content'] + '\\n\\n' }}{% endif %}{% if loop.last %}{{ '### Assistant:\\n' }}{% endif %}{% endfor %}", - "eos_to_cull": "", - "release_date": "2023-09-27", - "open_weight": true, - "parameters": "70B" - }, - { - "model_name": "sheep-duck-llama-2-13b", - "backend": "huggingface_local", - "huggingface_id": "Riiid/sheep-duck-llama-2-13b", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### User:\\n' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'system' %}{{ '### System:\\n' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ '### Assistant:\\n' + message['content'] + '\\n\\n' }}{% endif %}{% if loop.last %}{{ '### Assistant:\\n' }}{% endif %}{% endfor %}", - "eos_to_cull": "", - "release_date": "2023-10-04", - "open_weight": true, - "parameters": "13B" - }, - { - "model_name": "falcon-7b-instruct", - "backend": "huggingface_local", - "huggingface_id": "tiiuae/falcon-7b-instruct", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", - "eos_to_cull": "<\\|endoftext\\|>", - "release_date": "2023-04-25", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "falcon-40b-instruct", - "backend": "huggingface_local", - "huggingface_id": "tiiuae/falcon-40b-instruct", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", - "eos_to_cull": "<\\|endoftext\\|>", - "release_date": "2023-05-25", - "open_weight": true, - "parameters": "40B" - }, - { - "model_name": "oasst-sft-4-pythia-12b-epoch-3.5", - "backend": "huggingface_local", - "huggingface_id": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|prompter|>' + message['content'] + '<|endoftext|>' }}{% elif message['role'] == 'assistant' %}{{ '<|assistant|>' + message['content'] + '<|endoftext|>' }}{% endif %}{% if loop.last %}{{ '<|assistant|>' }}{% endif %}{% endfor %}", - "eos_to_cull": "<\\|endoftext\\|>", - "release_date": "2023-04-03", - "open_weight": true, - "parameters": "12B" - }, - { - "model_name": "koala-13B-HF", - "backend": "huggingface_local", - "huggingface_id": "TheBloke/koala-13B-HF", - "premade_chat_template": false, - "custom_chat_template": "{{ 'BEGINNING OF CONVERSATION: ' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + ' ' }}{% elif message['role'] == 'assistant' %}{{ 'GPT: ' + message['content'] + ' ' }}{% endif %}{% if loop.last %}{{ 'GPT:' }}{% endif %}{% endfor %}", - "eos_to_cull": "", - "release_date": "2023-04-07", - "open_weight": true, - "parameters": "13B" - }, - { - "model_name": "Wizard-Vicuna-13B-Uncensored-HF", - "backend": "huggingface_local", - "huggingface_id": "TheBloke/Wizard-Vicuna-13B-Uncensored-HF", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", - "eos_to_cull": "", - "release_date": "2023-05-13", - "open_weight": true, - "parameters": "13B" - }, - { - "model_name": "WizardLM-70b-v1.0", - "backend": "huggingface_local", - "huggingface_id": "WizardLM/WizardLM-70b-v1.0", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", - "eos_to_cull": "", - "release_date": "2023-08-09", - "open_weight": true, - "parameters": "70B" - }, - { - "model_name": "WizardLM-13b-v1.2", - "backend": "huggingface_local", - "huggingface_id": "WizardLM/WizardLM-13b-v1.2", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", - "eos_to_cull": "", - "release_date": "2023-07-25", - "open_weight": true, - "parameters": "13B" - }, - { - "model_name": "vicuna-7b-v1.5", - "backend": "huggingface_local", - "huggingface_id": "lmsys/vicuna-7b-v1.5", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", - "eos_to_cull": "", - "release_date": "2023-07-29", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "vicuna-13b-v1.5", - "backend": "huggingface_local", - "huggingface_id": "lmsys/vicuna-13b-v1.5", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", - "eos_to_cull": "", - "release_date": "2023-07-29", - "open_weight": true, - "parameters": "13B" - }, - { - "model_name": "vicuna-33b-v1.3", - "backend": "huggingface_local", - "huggingface_id": "lmsys/vicuna-33b-v1.3", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", - "eos_to_cull": "", - "release_date": "2023-06-21", - "open_weight": true, - "parameters": "33B" - }, - { - "model_name": "gpt4all-13b-snoozy", - "backend": "huggingface_local", - "huggingface_id": "nomic-ai/gpt4all-13b-snoozy", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", - "eos_to_cull": "", - "release_date": "2023-04-24", - "open_weight": true, - "parameters": "13B" - }, - { - "model_name": "CodeLlama-34b-Instruct-hf", - "backend": "huggingface_local", - "huggingface_id": "codellama/CodeLlama-34b-Instruct-hf", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2023-08-24", - "open_weight": true, - "parameters": "34B" - }, - { - "model_name": "zephyr-7b-alpha", - "backend": "huggingface_local", - "huggingface_id": "HuggingFaceH4/zephyr-7b-alpha", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2023-10-09", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "zephyr-7b-beta", - "backend": "huggingface_local", - "huggingface_id": "HuggingFaceH4/zephyr-7b-beta", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2023-10-26", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "openchat_3.5", - "backend": "huggingface_local", - "huggingface_id": "openchat/openchat_3.5", - "premade_chat_template": true, - "eos_to_cull": "<\\|end_of_turn\\|>", - "release_date": "2023-10-30", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "Yi-34B-Chat", - "backend": "huggingface_local", - "huggingface_id": "01-ai/Yi-34B-Chat", - "premade_chat_template": true, - "slow_tokenizer": true, - "output_split_prefix": "assistant\n", - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2023-11-22", - "open_weight": true, - "parameters": "34B" - }, - { - "model_name": "deepseek-llm-7b-chat", - "backend": "huggingface_local", - "huggingface_id": "deepseek-ai/deepseek-llm-7b-chat", - "premade_chat_template": true, - "eos_to_cull": "<\uff5cend\u2581of\u2581sentence\uff5c>", - "release_date": "2023-11-29", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "deepseek-llm-67b-chat", - "backend": "huggingface_local", - "huggingface_id": "deepseek-ai/deepseek-llm-67b-chat", - "premade_chat_template": true, - "eos_to_cull": "<\uff5cend\u2581of\u2581sentence\uff5c>", - "release_date": "2023-11-29", - "open_weight": true, - "parameters": "67B" - }, - { - "model_name": "tulu-2-dpo-7b", - "backend": "huggingface_local", - "huggingface_id": "allenai/tulu-2-dpo-7b", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}", - "eos_to_cull": "", - "release_date": "2023-11-13", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "tulu-2-dpo-70b", - "backend": "huggingface_local", - "huggingface_id": "allenai/tulu-2-dpo-70b", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}", - "eos_to_cull": "", - "release_date": "2023-11-12", - "open_weight": true, - "parameters": "70B" - }, - { - "model_name": "Mixtral-8x7B-Instruct-v0.1", - "backend": "huggingface_local", - "huggingface_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2023-12-11", - "open_weight": true, - "parameters": "46.7B" - }, - { - "model_name": "SUS-Chat-34B", - "backend": "huggingface_local", - "huggingface_id": "SUSTech/SUS-Chat-34B", - "premade_chat_template": false, - "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Human: ' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ '### Assistant: ' + message['content'] }}{% endif %}{% if loop.last %}{{ '### Assistant: ' }}{% endif %}{% endfor %}", - "slow_tokenizer": true, - "eos_to_cull": "<\\|endoftext\\|>", - "release_date": "2023-11-29", - "open_weight": true, - "parameters": "34B" - }, - { - "model_name": "CodeLlama-70b-Instruct-hf", - "backend": "huggingface_local", - "huggingface_id": "codellama/CodeLlama-70b-Instruct-hf", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2024-01-29", - "open_weight": true, - "parameters": "70B" - }, - { - "model_name": "openchat-3.5-0106", - "backend": "huggingface_local", - "huggingface_id": "openchat/openchat-3.5-0106", - "premade_chat_template": true, - "eos_to_cull": "<\\|end_of_turn\\|>", - "release_date": "2024-01-06", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "openchat-3.5-1210", - "backend": "huggingface_local", - "huggingface_id": "openchat/openchat-3.5-1210", - "premade_chat_template": true, - "eos_to_cull": "<\\|end_of_turn\\|>", - "release_date": "2023-12-10", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "Nous-Hermes-2-Mixtral-8x7B-DPO", - "backend": "huggingface_local", - "huggingface_id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", - "premade_chat_template": true, - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2024-01-11", - "open_weight": true, - "parameters": "46.7B" - }, - { - "model_name": "Smaug-72B-v0.1", - "backend": "huggingface_local", - "huggingface_id": "abacusai/Smaug-72B-v0.1", - "premade_chat_template": false, - "custom_chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\n' + system_message + '\n<>\n\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}", - "eos_to_cull": "<\\|endoftext\\|>", - "release_date": "2024-02-02", - "open_weight": true, - "parameters": "72B" - }, - { - "model_name": "Smaug-34B-v0.1", - "backend": "huggingface_local", - "huggingface_id": "abacusai/Smaug-34B-v0.1", - "premade_chat_template": false, - "custom_chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\n' + system_message + '\n<>\n\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}", - "eos_to_cull": "<\\|endoftext\\|>", - "release_date": "2024-01-25", - "open_weight": true, - "parameters": "34B" - }, - { - "model_name": "Qwen1.5-7B-Chat", - "backend": "huggingface_local", - "huggingface_id": "Qwen/Qwen1.5-7B-Chat", - "premade_chat_template": true, - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2024-01-30", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "Qwen1.5-72B-Chat", - "backend": "huggingface_local", - "huggingface_id": "Qwen/Qwen1.5-72B-Chat", - "premade_chat_template": true, - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2024-01-30", - "open_weight": true, - "parameters": "72B" - }, - { - "model_name": "Swallow-70b-instruct-v0.1", - "backend": "huggingface_local", - "huggingface_id": "tokyotech-llm/Swallow-70b-instruct-v0.1", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2023-12-19", - "open_weight": true, - "parameters": "70B" - }, - { - "model_name": "Phi-3-mini-128k-instruct", - "backend": "huggingface_local", - "huggingface_id": "microsoft/Phi-3-mini-128k-instruct", - "premade_chat_template": true, - "eos_to_cull": "<\\|endoftext\\|>", - "release_date": "2024-04-22", - "open_weight": true, - "parameters": "3.8B" - }, - { - "model_name": "Starling-LM-7B-beta", - "backend": "huggingface_local", - "huggingface_id": "Nexusflow/Starling-LM-7B-beta", - "premade_chat_template": true, - "eos_to_cull": "<\\|end_of_turn\\|>", - "release_date": "2024-03-19", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "Qwen2-7B-Instruct", - "backend": "huggingface_local", - "huggingface_id": "Qwen/Qwen2-7B-Instruct", - "premade_chat_template": true, - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2024-06-04", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "Qwen2-72B-Instruct", - "backend": "huggingface_local", - "huggingface_id": "Qwen/Qwen2-72B-Instruct", - "premade_chat_template": true, - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2024-05-28", - "open_weight": true, - "parameters": "72B" - }, - { - "model_name": "Llama-3-SauerkrautLM-70b-Instruct", - "backend": "huggingface_local", - "huggingface_id": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct", - "premade_chat_template": true, - "eos_to_cull": "<\\|eot_id\\|>", - "release_date": "2024-04-24", - "open_weight": true, - "parameters": "70B" - }, - { - "model_name": "aya-23-8B", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "CohereForAI/aya-23-8B", - "premade_chat_template": true, - "eos_to_cull": "<\\|END_OF_TURN_TOKEN\\|>", - "release_date": "2024-05-19", - "open_weight": true, - "parameters": "8B" - }, - { - "model_name": "aya-23-35B", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "CohereForAI/aya-23-35B", - "premade_chat_template": true, - "eos_to_cull": "<\\|END_OF_TURN_TOKEN\\|>", - "release_date": "2024-05-19", - "open_weight": true, - "parameters": "35B" - }, - { - "model_name": "gemma-2-9b-it", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "google/gemma-2-9b-it", - "premade_chat_template": true, - "eos_to_cull": "\n*", - "release_date": "2024-06-24", - "open_weight": true, - "parameters": "9B" - }, - { - "model_name": "gemma-2-27b-it", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "google/gemma-2-27b-it", - "premade_chat_template": true, - "eos_to_cull": "\n*", - "release_date": "2024-06-24", - "open_weight": true, - "parameters": "27B" - }, - { - "model_name": "llama-2-7b-chat-hf", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "meta-llama/llama-2-7b-chat-hf", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2023-07-18", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "llama-2-13b-chat-hf", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "meta-llama/llama-2-13b-chat-hf", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2023-07-18", - "open_weight": true, - "parameters": "13B" - }, - { - "model_name": "llama-2-70b-chat-hf", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "meta-llama/llama-2-70b-chat-hf", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2023-07-18", - "open_weight": true, - "parameters": "70B" - }, - { - "model_name": "gemma-7b-it", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "google/gemma-7b-it", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2024-02-21", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "gemma-1.1-2b-it", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "google/gemma-1.1-2b-it", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2024-03-26", - "open_weight": true, - "parameters": "2B" - }, - { - "model_name": "gemma-1.1-7b-it", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "google/gemma-1.1-7b-it", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2024-03-26", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "codegemma-7b-it", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "google/codegemma-7b-it", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2024-04-09", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "recurrentgemma-2b-it", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "google/recurrentgemma-2b-it", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2024-04-09", - "open_weight": true, - "parameters": "2B" - }, - { - "model_name": "gemma-2-2b-it", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "google/gemma-2-2b-it", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2024-07-16", - "open_weight": true, - "parameters": "2B" - }, - { - "model_name": "Meta-Llama-3.1-8B-Instruct", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", - "premade_chat_template": true, - "eos_to_cull": "<\\|eot_id\\|>", - "release_date": "2024-07-23", - "open_weight": true, - "parameters": "8B" - }, - { - "model_name": "Meta-Llama-3.1-70B-Instruct", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "meta-llama/Meta-Llama-3.1-70B-Instruct", - "premade_chat_template": true, - "eos_to_cull": "<\\|eot_id\\|>", - "release_date": "2024-07-23", - "open_weight": true, - "parameters": "70B" - }, - { - "model_name": "Mistral-Large-Instruct-2407", - "backend": "huggingface_local", - "requires_api_key": true, - "huggingface_id": "mistralai/Mistral-Large-Instruct-2407", - "premade_chat_template": true, - "eos_to_cull": "", - "release_date": "2024-07-24", - "open_weight": true, - "parameters": "123B" - }, - { - "model_name": "Qwen2.5-7B-Instruct", - "backend": "huggingface_local", - "huggingface_id": "Qwen/Qwen2.5-7B-Instruct", - "premade_chat_template": true, - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2024-07-24", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "Qwen2.5-14B-Instruct", - "backend": "huggingface_local", - "huggingface_id": "Qwen/Qwen2.5-14B-Instruct", - "premade_chat_template": true, - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2024-07-24", - "open_weight": true, - "parameters": "14B" - }, - { - "model_name": "Qwen2.5-32B-Instruct", - "backend": "huggingface_local", - "huggingface_id": "Qwen/Qwen2.5-32B-Instruct", - "premade_chat_template": true, - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2024-07-24", - "open_weight": true, - "parameters": "32B" - }, - { - "model_name": "Qwen2.5-72B-Instruct", - "backend": "huggingface_local", - "huggingface_id": "Qwen/Qwen2.5-72B-Instruct", - "premade_chat_template": true, - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2024-07-24", - "open_weight": true, - "parameters": "72B" - }, - { - "model_name": "Llama-3.2-1B-Instruct", - "backend": "huggingface_local", - "huggingface_id": "meta-llama/Llama-3.2-1B-Instruct", - "premade_chat_template": true, - "eos_to_cull": "<\\|eot_id\\|>", - "release_date": "2024-09-18", - "open_weight": false, - "parameters": "1B" - }, - { - "model_name": "Llama-3.2-3B-Instruct", - "backend": "huggingface_local", - "huggingface_id": "meta-llama/Llama-3.2-3B-Instruct", - "premade_chat_template": true, - "eos_to_cull": "<\\|eot_id\\|>", - "release_date": "2024-09-18", - "open_weight": false, - "parameters": "3B" - }, - { - "model_name": "EuroLLM-1.7B-Instruct", - "backend": "huggingface_local", - "huggingface_id": "utter-project/EuroLLM-1.7B-Instruct", - "premade_chat_template": true, - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2024-09-24", - "open_weight": true, - "parameters": "1.7B" - }, - { - "model_name": "Qwen1.5-0.5B-Chat-GGUF-q8", - "backend": "llamacpp", - "huggingface_id": "Qwen/Qwen1.5-0.5B-Chat-GGUF", - "filename": "*q8_0.gguf", - "premade_chat_template": true, - "bos_string": "", - "eos_string": "<|im_end|>", - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2024-02-03", - "open_weight": true, - "parameters": "0.5B" - }, - { - "model_name": "CapybaraHermes-2.5-Mistral-7B-GGUF-q4", - "backend": "llamacpp", - "huggingface_id": "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF", - "filename": "*Q4_0.gguf", - "premade_chat_template": true, - "bos_string": "", - "eos_string": "<|im_end|>", - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2024-01-31", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "CapybaraHermes-2.5-Mistral-7B-GGUF-q5", - "backend": "llamacpp", - "huggingface_id": "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF", - "filename": "*Q5_0.gguf", - "premade_chat_template": true, - "bos_string": "", - "eos_string": "<|im_end|>", - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2024-01-31", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "CapybaraHermes-2.5-Mistral-7B-GGUF-q5-k-s", - "backend": "llamacpp", - "huggingface_id": "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF", - "filename": "*Q5_K_S.gguf", - "premade_chat_template": true, - "bos_string": "", - "eos_string": "<|im_end|>", - "eos_to_cull": "<\\|im_end\\|>", - "release_date": "2024-01-31", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "EstopianMaid-13B-GGUF-q2-k", - "backend": "llamacpp", - "huggingface_id": "TheBloke/EstopianMaid-13B-GGUF", - "filename": "*Q2_K.gguf", - "premade_chat_template": false, - "custom_chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'].strip() + '\\n\\n' %}{% else %}{% set loop_messages = messages %}{% set system_message = '' %}{% endif %}{% if system_message %}{{ bos_token + system_message }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{bos_token + '### Instruction:\\n' + message['content'].strip() + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response:\\n' + message['content'].strip() + eos_token + '\\n\\n' }}{% endif %}{% if loop.last and message['role'] == 'user' and add_generation_prompt %}{{ '### Response:\\n' }}{% endif %}{% endfor %}", - "bos_string": "", - "eos_string": "", - "eos_to_cull": "", - "release_date": "2024-01-26", - "open_weight": true, - "parameters": "13B" - }, - { - "model_name": "EstopianMaid-13B-GGUF-q3-k-s", - "backend": "llamacpp", - "huggingface_id": "TheBloke/EstopianMaid-13B-GGUF", - "filename": "*Q3_K_S.gguf", - "premade_chat_template": false, - "custom_chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'].strip() + '\\n\\n' %}{% else %}{% set loop_messages = messages %}{% set system_message = '' %}{% endif %}{% if system_message %}{{ bos_token + system_message }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{bos_token + '### Instruction:\\n' + message['content'].strip() + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response:\\n' + message['content'].strip() + eos_token + '\\n\\n' }}{% endif %}{% if loop.last and message['role'] == 'user' and add_generation_prompt %}{{ '### Response:\\n' }}{% endif %}{% endfor %}", - "bos_string": "", - "eos_string": "", - "eos_to_cull": "", - "release_date": "2024-01-26", - "open_weight": true, - "parameters": "13B" - }, - { - "model_name": "openchat_3.5-GGUF-q5", - "backend": "llamacpp", - "huggingface_id": "TheBloke/openchat_3.5-GGUF", - "filename": "*Q5_0.gguf", - "premade_chat_template": false, - "custom_chat_template": "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}", - "bos_string": "", - "eos_string": "<|end_of_turn|>", - "eos_to_cull": "<\\|end_of_turn\\|>", - "release_date": "2023-11-02", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "Meta-Llama-3-70B-Instruct-GGUF-q4", - "backend": "llamacpp", - "huggingface_id": "MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF", - "filename": "*Q4_K_M.gguf", - "premade_chat_template": true, - "bos_string": "<|begin_of_text|>", - "eos_string": "<|eot_id|>", - "eos_to_cull": "<\\|eot_id\\|>", - "release_date": "2024-04-18", - "open_weight": true, - "parameters": "70B" - }, - { - "model_name": "Meta-Llama-3-70B-Instruct-GGUF-q8", - "backend": "llamacpp", - "huggingface_id": "MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF", - "filename": "*Q8_0-00001-of-00002.gguf", - "additional_files": [ - "*Q8_0-00002-of-00002.gguf" - ], - "premade_chat_template": true, - "bos_string": "<|begin_of_text|>", - "eos_string": "<|eot_id|>", - "eos_to_cull": "<\\|eot_id\\|>", - "release_date": "2024-04-18", - "open_weight": true, - "parameters": "70B" - }, - { - "model_name": "c4ai-command-r-plus-GGUF-q4", - "backend": "llamacpp", - "huggingface_id": "pmysl/c4ai-command-r-plus-GGUF", - "filename": "*Q4_K_M-00001-of-00002.gguf", - "additional_files": [ - "*Q4_K_M-00002-of-00002.gguf" - ], - "premade_chat_template": true, - "bos_string": "", - "eos_string": "<|END_OF_TURN_TOKEN|>", - "eos_to_cull": "<\\|END_OF_TURN_TOKEN\\|>", - "release_date": "2024-04-04", - "open_weight": true, - "parameters": "104B" - }, - { - "model_name": "c4ai-command-r-plus-GGUF-q8", - "backend": "llamacpp", - "huggingface_id": "pmysl/c4ai-command-r-plus-GGUF", - "filename": "*Q8_0-00001-of-00003.gguf", - "additional_files": [ - "*Q8_0-00002-of-00003.gguf", - "*Q8_0-00003-of-00003.gguf" - ], - "premade_chat_template": true, - "bos_string": "", - "eos_string": "<|END_OF_TURN_TOKEN|>", - "eos_to_cull": "<\\|END_OF_TURN_TOKEN\\|>", - "release_date": "2024-04-04", - "open_weight": true, - "parameters": "104B" - }, - { - "model_name": "llava-1.5-7b-hf", + { + "model_name": "openchat", + "model_id": "openchat-3.5-0106", + "backend": "openai_compatible", + "release_date": "2024-01-06", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "codellama-34b", + "model_id": "codellama-34b-instruct", + "backend": "openai_compatible", + "release_date": "2023-08-24", + "open_weight": true, + "parameters": "34B" + }, + { + "model_name": "Llama-3-70B-Instruct-Anyscale", + "model_id": "meta-llama/Meta-Llama-3-70B-Instruct", + "backend": "openai_compatible", + "release_date": "2024-04-18", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "Llama-3-70B-Together.ai", + "model_id": "meta-llama/Llama-3-70b-chat-hf", + "backend": "openai_compatible", + "release_date": "2024-04-18", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "Llama-3-8B-Together.ai", + "model_id": "meta-llama/Llama-3-8b-chat-hf", + "backend": "openai_compatible", + "release_date": "2024-04-18", + "open_weight": true, + "parameters": "8B" + }, + { + "model_name": "Llama-3-8B-Instruct-Anyscale", + "model_id": "meta-llama/Meta-Llama-3-8B-Instruct", + "backend": "openai_compatible", + "release_date": "2024-04-18", + "open_weight": true, + "parameters": "8B" + }, + { + "model_name": "Meta-Llama-3.1-405B-Instruct-Turbo", + "model_id": "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", + "backend": "openai_compatible", + "release_date": "2024-07-23", + "open_weight": true, + "parameters": "405B" + }, + { + "model_name": "Mixtral-8x22B-Instruct-v0.1", + "model_id": "mistralai/Mixtral-8x22B-Instruct-v0.1", + "backend": "openai_compatible", + "release_date": "2024-04-17", + "open_weight": true, + "parameters": "141B" + }, + { + "model_name": "Mixtral-8x7B-Instruct-v0.1", + "model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "backend": "openai_compatible", + "release_date": "2023-12-11", + "open_weight": true, + "parameters": "46.7B" + }, + { + "model_name": "fsc-openchat-3.5-0106", + "model_id": "openchat-3.5-0106", + "backend": "openai_compatible", + "release_date": "2024-01-06", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "fsc-codellama-34b-instruct", + "model_id": "codellama-34b-instruct", + "backend": "openai_compatible", + "release_date": "2023-08-24", + "open_weight": true, + "parameters": "34B" + }, + { + "model_name": "gpt-4-1106-vision-preview", + "model_id": "gpt-4-1106-vision-preview", + "backend": "openai", + "supports_images": true, + "support_multiple_images": true, + "release_date": "2023-11-06", + "open_weight": false, + "parameters": "", + "estimated_parameters": "1.76T" + }, + { + "model_name": "gpt-4o-2024-05-13", + "model_id": "gpt-4o-2024-05-13", + "backend": "openai", + "supports_images": true, + "support_multiple_images": true, + "release_date": "2024-05-13", + "open_weight": false, + "parameters": "", + "estimated_parameters": "200B" + }, + { + "model_name": "gpt-4o-2024-08-06", + "model_id": "gpt-4o-2024-08-06", + "backend": "openai", + "supports_images": true, + "support_multiple_images": true, + "release_date": "2024-08-06", + "open_weight": false, + "parameters": "", + "estimated_parameters": "200B" + }, + { + "model_name": "gpt-4o-mini-2024-07-18", + "model_id": "gpt-4o-mini-2024-07-18", + "backend": "openai", + "supports_images": true, + "support_multiple_images": true, + "release_date": "2024-07-18", + "open_weight": false, + "parameters": "", + "estimated_parameters": "8B" + }, + { + "model_name": "gpt-4-turbo-2024-04-09", + "model_id": "gpt-4-turbo-2024-04-09", + "backend": "openai", + "release_date": "2024-04-09", + "open_weight": false, + "parameters": "1.76T" + }, + { + "model_name": "gpt-4-1106-preview", + "model_id": "gpt-4-1106-preview", + "backend": "openai", + "release_date": "2023-11-06", + "open_weight": false, + "parameters": "1.76T" + }, + { + "model_name": "gpt-4-0125-preview", + "model_id": "gpt-4-0125-preview", + "backend": "openai", + "release_date": "2024-01-25", + "open_weight": false, + "parameters": "1.76T" + }, + { + "model_name": "o1-preview-2024-09-12", + "model_id": "o1-preview-2024-09-12", + "backend": "openai", + "release_date": "2024-09-12", + "open_weight": false, + "parameters": "", + "o1_model": true + }, + { + "model_name": "o1-mini-2024-09-12", + "model_id": "o1-mini-2024-09-12", + "backend": "openai", + "release_date": "2024-09-12", + "open_weight": false, + "parameters": "", + "o1_model": true + }, + { + "model_name": "gpt-3.5-turbo-0125", + "model_id": "gpt-3.5-turbo-0125", + "backend": "openai", + "release_date": "2024-01-25", + "open_weight": false, + "parameters": "175B" + }, + { + "model_name": "gpt-4-0613", + "model_id": "gpt-4-0613", + "backend": "openai", + "release_date": "2023-06-13", + "open_weight": false, + "parameters": "1.76T" + }, + { + "model_name": "gpt-4-0314", + "model_id": "gpt-4-0314", + "backend": "openai", + "release_date": "2023-03-14", + "open_weight": false, + "parameters": "1.76T" + }, + { + "model_name": "gpt-3.5-turbo-1106", + "model_id": "gpt-3.5-turbo-1106", + "backend": "openai", + "release_date": "2023-11-06", + "open_weight": false, + "parameters": "175B" + }, + { + "model_name": "gpt-3.5-turbo-0613", + "model_id": "gpt-3.5-turbo-0613", + "backend": "openai", + "release_date": "2023-06-13", + "open_weight": false, + "parameters": "175B" + }, + { + "model_name": "mistral-medium-2312", + "model_id": "mistral-medium-2312", + "backend": "mistral", + "release_date": "2023-12-01", + "open_weight": true, + "parameters": "", + "estimated_parameters": "141B" + }, + { + "model_name": "mistral-tiny-2312", + "model_id": "mistral-tiny-2312", + "backend": "mistral", + "release_date": "2023-12-01", + "open_weight": true, + "parameters": "", + "estimated_parameters": "7B" + }, + { + "model_name": "mistral-small-2312", + "model_id": "mistral-small-2312", + "backend": "mistral", + "release_date": "2023-12-01", + "open_weight": true, + "parameters": "", + "estimated_parameters": "46.7B" + }, + { + "model_name": "mistral-large-2402", + "model_id": "mistral-large-2402", + "backend": "mistral", + "release_date": "2024-02-01", + "open_weight": true, + "parameters": "123B" + }, + { + "model_name": "command", + "model_id": "command", + "backend": "cohere", + "release_date": "2022-12-01", + "open_weight": false, + "parameters": "", + "estimated_parameters": "" + }, + { + "model_name": "command-r", + "model_id": "command-r", + "backend": "cohere", + "release_date": "2024-03-01", + "open_weight": true, + "parameters": "35B" + }, + { + "model_name": "command-r-plus", + "model_id": "command-r-plus", + "backend": "cohere", + "release_date": "2024-04-01", + "open_weight": true, + "parameters": "104B" + }, + { + "model_name": "command-light", + "model_id": "command-light", + "backend": "cohere", + "release_date": "2022-12-01", + "open_weight": false, + "parameters": "", + "estimated_parameters": "" + }, + { + "model_name": "claude-v1.3", + "model_id": "claude-v1.3", + "backend": "anthropic", + "release_date": "2023-04-18", + "open_weight": false, + "parameters": "", + "estimated_parameters": "" + }, + { + "model_name": "claude-v1.3-100k", + "model_id": "claude-v1.3-100k", + "backend": "anthropic", + "release_date": "2023-03-18", + "open_weight": false, + "parameters": "", + "estimated_parameters": "" + }, + { + "model_name": "claude-instant-1.2", + "model_id": "claude-instant-1.2", + "backend": "anthropic", + "release_date": "2023-08-09", + "open_weight": false, + "parameters": "", + "estimated_parameters": "" + }, + { + "model_name": "claude-2", + "model_id": "claude-2", + "backend": "anthropic", + "release_date": "2023-07-11", + "open_weight": false, + "parameters": "137B" + }, + { + "model_name": "claude-2.1", + "model_id": "claude-2.1", + "backend": "anthropic", + "release_date": "2023-11-21", + "open_weight": false, + "parameters": "137B" + }, + { + "model_name": "claude-3-opus-20240229", + "model_id": "claude-3-opus-20240229", + "backend": "anthropic", + "supports_images": true, + "support_multiple_images": true, + "release_date": "2024-02-29", + "open_weight": false, + "parameters": "2T" + }, + { + "model_name": "claude-3-sonnet-20240229", + "model_id": "claude-3-sonnet-20240229", + "backend": "anthropic", + "supports_images": true, + "support_multiple_images": true, + "release_date": "2024-02-29", + "open_weight": false, + "parameters": "70B" + }, + { + "model_name": "claude-3-haiku-20240307", + "model_id": "claude-3-haiku-20240307", + "backend": "anthropic", + "supports_images": true, + "support_multiple_images": true, + "release_date": "2024-03-07", + "open_weight": false, + "parameters": "20B" + }, + { + "model_name": "claude-3-5-sonnet-20240620", + "model_id": "claude-3-5-sonnet-20240620", + "backend": "anthropic", + "supports_images": true, + "support_multiple_images": true, + "release_date": "2024-06-20", + "open_weight": false, + "parameters": "", + "estimated_parameters": "" + }, + { + "model_name": "gemini-1.0-pro-001", + "model_id": "gemini-1.0-pro-001", + "backend": "google", + "release_date": "2024-02-15", + "open_weight": false, + "parameters": "", + "estimated_parameters": "" + }, + { + "model_name": "gemini-1.0-pro-002", + "model_id": "gemini-1.0-pro-002", + "backend": "google", + "release_date": "2024-04-09", + "open_weight": false, + "parameters": "", + "estimated_parameters": "" + }, + { + "model_name": "gemini-1.0-pro-vision-001", + "model_id": "gemini-1.0-pro-vision-latest", + "backend": "google", + "supports_images": true, + "support_multiple_images": true, + "release_date": "2024-02-15", + "open_weight": false, + "parameters": "", + "estimated_parameters": "" + }, + { + "model_name": "gemini-1.5-flash-001", + "model_id": "gemini-1.5-flash-001", + "backend": "google", + "supports_images": true, + "support_multiple_images": true, + "release_date": "2024-05-24", + "open_weight": false, + "parameters": "", + "estimated_parameters": "" + }, + { + "model_name": "gemini-1.5-pro-001", + "model_id": "gemini-1.5-pro-001", + "backend": "google", + "supports_images": true, + "support_multiple_images": true, + "release_date": "2024-05-24", + "open_weight": false, + "parameters": "1.5T" + }, + { + "model_name": "gemini-1.5-pro-002", + "model_id": "gemini-1.5-pro-002", + "backend": "google", + "supports_images": true, + "support_multiple_images": true, + "release_date": "2024-09-24", + "open_weight": false, + "parameters": "1.5T" + }, + { + "model_name": "gemini-1.5-flash-002", + "model_id": "gemini-1.5-flash-002", + "backend": "google", + "supports_images": true, + "support_multiple_images": true, + "release_date": "2024-09-24", + "open_weight": false, + "parameters": "", + "estimated_parameters": "" + }, + { + "model_name": "gemini-1.5-flash-8b-001", + "model_id": "gemini-1.5-flash-8b-001", + "backend": "google", + "supports_images": true, + "support_multiple_images": true, + "release_date": "2024-10-03", + "open_weight": false, + "parameters": "8B" + }, + { + "model_name": "luminous-supreme-control", + "model_id": "luminous-supreme-control", + "backend": "alephalpha", + "release_date": "2023-02-13", + "open_weight": false, + "parameters": "70B" + }, + { + "model_name": "luminous-supreme", + "model_id": "luminous-supreme", + "backend": "alephalpha", + "release_date": "2022-08-15", + "open_weight": false, + "parameters": "70B" + }, + { + "model_name": "luminous-extended", + "model_id": "luminous-extended", + "backend": "alephalpha", + "release_date": "2022-06-15", + "open_weight": false, + "parameters": "30B" + }, + { + "model_name": "luminous-base", + "model_id": "luminous-base", + "backend": "alephalpha", + "release_date": "2022-04-14", + "open_weight": false, + "parameters": "13B" + }, + { + "model_name": "Mistral-7B-Instruct-v0.1", + "backend": "huggingface_local", + "huggingface_id": "mistralai/Mistral-7B-Instruct-v0.1", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2023-09-27", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "sheep-duck-llama-2-70b-v1.1", + "backend": "huggingface_local", + "huggingface_id": "Riiid/sheep-duck-llama-2-70b-v1.1", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### User:\\n' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'system' %}{{ '### System:\\n' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ '### Assistant:\\n' + message['content'] + '\\n\\n' }}{% endif %}{% if loop.last %}{{ '### Assistant:\\n' }}{% endif %}{% endfor %}", + "eos_to_cull": "", + "release_date": "2023-09-27", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "sheep-duck-llama-2-13b", + "backend": "huggingface_local", + "huggingface_id": "Riiid/sheep-duck-llama-2-13b", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### User:\\n' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'system' %}{{ '### System:\\n' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ '### Assistant:\\n' + message['content'] + '\\n\\n' }}{% endif %}{% if loop.last %}{{ '### Assistant:\\n' }}{% endif %}{% endfor %}", + "eos_to_cull": "", + "release_date": "2023-10-04", + "open_weight": true, + "parameters": "13B" + }, + { + "model_name": "falcon-7b-instruct", + "backend": "huggingface_local", + "huggingface_id": "tiiuae/falcon-7b-instruct", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", + "eos_to_cull": "<\\|endoftext\\|>", + "release_date": "2023-04-25", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "falcon-40b-instruct", + "backend": "huggingface_local", + "huggingface_id": "tiiuae/falcon-40b-instruct", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", + "eos_to_cull": "<\\|endoftext\\|>", + "release_date": "2023-05-25", + "open_weight": true, + "parameters": "40B" + }, + { + "model_name": "oasst-sft-4-pythia-12b-epoch-3.5", + "backend": "huggingface_local", + "huggingface_id": "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|prompter|>' + message['content'] + '<|endoftext|>' }}{% elif message['role'] == 'assistant' %}{{ '<|assistant|>' + message['content'] + '<|endoftext|>' }}{% endif %}{% if loop.last %}{{ '<|assistant|>' }}{% endif %}{% endfor %}", + "eos_to_cull": "<\\|endoftext\\|>", + "release_date": "2023-04-03", + "open_weight": true, + "parameters": "12B" + }, + { + "model_name": "koala-13B-HF", + "backend": "huggingface_local", + "huggingface_id": "TheBloke/koala-13B-HF", + "premade_chat_template": false, + "custom_chat_template": "{{ 'BEGINNING OF CONVERSATION: ' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + ' ' }}{% elif message['role'] == 'assistant' %}{{ 'GPT: ' + message['content'] + ' ' }}{% endif %}{% if loop.last %}{{ 'GPT:' }}{% endif %}{% endfor %}", + "eos_to_cull": "", + "release_date": "2023-04-07", + "open_weight": true, + "parameters": "13B" + }, + { + "model_name": "Wizard-Vicuna-13B-Uncensored-HF", + "backend": "huggingface_local", + "huggingface_id": "TheBloke/Wizard-Vicuna-13B-Uncensored-HF", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", + "eos_to_cull": "", + "release_date": "2023-05-13", + "open_weight": true, + "parameters": "13B" + }, + { + "model_name": "WizardLM-70b-v1.0", + "backend": "huggingface_local", + "huggingface_id": "WizardLM/WizardLM-70b-v1.0", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", + "eos_to_cull": "", + "release_date": "2023-08-09", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "WizardLM-13b-v1.2", + "backend": "huggingface_local", + "huggingface_id": "WizardLM/WizardLM-13b-v1.2", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", + "eos_to_cull": "", + "release_date": "2023-07-25", + "open_weight": true, + "parameters": "13B" + }, + { + "model_name": "vicuna-7b-v1.5", + "backend": "huggingface_local", + "huggingface_id": "lmsys/vicuna-7b-v1.5", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", + "eos_to_cull": "", + "release_date": "2023-07-29", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "vicuna-13b-v1.5", + "backend": "huggingface_local", + "huggingface_id": "lmsys/vicuna-13b-v1.5", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", + "eos_to_cull": "", + "release_date": "2023-07-29", + "open_weight": true, + "parameters": "13B" + }, + { + "model_name": "vicuna-33b-v1.3", + "backend": "huggingface_local", + "huggingface_id": "lmsys/vicuna-33b-v1.3", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", + "eos_to_cull": "", + "release_date": "2023-06-21", + "open_weight": true, + "parameters": "33B" + }, + { + "model_name": "gpt4all-13b-snoozy", + "backend": "huggingface_local", + "huggingface_id": "nomic-ai/gpt4all-13b-snoozy", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + '\\n' }}{% elif message['role'] == 'assistant' %}{{ 'ASSISTANT: ' + message['content'] + '\\n' }}{% endif %}{% if loop.last %}{{ 'ASSISTANT:' }}{% endif %}{% endfor %}", + "eos_to_cull": "", + "release_date": "2023-04-24", + "open_weight": true, + "parameters": "13B" + }, + { + "model_name": "CodeLlama-34b-Instruct-hf", + "backend": "huggingface_local", + "huggingface_id": "codellama/CodeLlama-34b-Instruct-hf", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2023-08-24", + "open_weight": true, + "parameters": "34B" + }, + { + "model_name": "zephyr-7b-alpha", + "backend": "huggingface_local", + "huggingface_id": "HuggingFaceH4/zephyr-7b-alpha", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2023-10-09", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "zephyr-7b-beta", + "backend": "huggingface_local", + "huggingface_id": "HuggingFaceH4/zephyr-7b-beta", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2023-10-26", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "openchat_3.5", + "backend": "huggingface_local", + "huggingface_id": "openchat/openchat_3.5", + "premade_chat_template": true, + "eos_to_cull": "<\\|end_of_turn\\|>", + "release_date": "2023-10-30", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "Yi-34B-Chat", + "backend": "huggingface_local", + "huggingface_id": "01-ai/Yi-34B-Chat", + "premade_chat_template": true, + "slow_tokenizer": true, + "output_split_prefix": "assistant\n", + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2023-11-22", + "open_weight": true, + "parameters": "34B" + }, + { + "model_name": "deepseek-llm-7b-chat", + "backend": "huggingface_local", + "huggingface_id": "deepseek-ai/deepseek-llm-7b-chat", + "premade_chat_template": true, + "eos_to_cull": "<\uff5cend\u2581of\u2581sentence\uff5c>", + "release_date": "2023-11-29", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "deepseek-llm-67b-chat", + "backend": "huggingface_local", + "huggingface_id": "deepseek-ai/deepseek-llm-67b-chat", + "premade_chat_template": true, + "eos_to_cull": "<\uff5cend\u2581of\u2581sentence\uff5c>", + "release_date": "2023-11-29", + "open_weight": true, + "parameters": "67B" + }, + { + "model_name": "tulu-2-dpo-7b", + "backend": "huggingface_local", + "huggingface_id": "allenai/tulu-2-dpo-7b", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}", + "eos_to_cull": "", + "release_date": "2023-11-13", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "tulu-2-dpo-70b", + "backend": "huggingface_local", + "huggingface_id": "allenai/tulu-2-dpo-70b", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}", + "eos_to_cull": "", + "release_date": "2023-11-12", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "Mixtral-8x7B-Instruct-v0.1", + "backend": "huggingface_local", + "huggingface_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2023-12-11", + "open_weight": true, + "parameters": "46.7B" + }, + { + "model_name": "SUS-Chat-34B", + "backend": "huggingface_local", + "huggingface_id": "SUSTech/SUS-Chat-34B", + "premade_chat_template": false, + "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Human: ' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ '### Assistant: ' + message['content'] }}{% endif %}{% if loop.last %}{{ '### Assistant: ' }}{% endif %}{% endfor %}", + "slow_tokenizer": true, + "eos_to_cull": "<\\|endoftext\\|>", + "release_date": "2023-11-29", + "open_weight": true, + "parameters": "34B" + }, + { + "model_name": "CodeLlama-70b-Instruct-hf", + "backend": "huggingface_local", + "huggingface_id": "codellama/CodeLlama-70b-Instruct-hf", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2024-01-29", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "openchat-3.5-0106", + "backend": "huggingface_local", + "huggingface_id": "openchat/openchat-3.5-0106", + "premade_chat_template": true, + "eos_to_cull": "<\\|end_of_turn\\|>", + "release_date": "2024-01-06", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "openchat-3.5-1210", + "backend": "huggingface_local", + "huggingface_id": "openchat/openchat-3.5-1210", + "premade_chat_template": true, + "eos_to_cull": "<\\|end_of_turn\\|>", + "release_date": "2023-12-10", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "Nous-Hermes-2-Mixtral-8x7B-DPO", + "backend": "huggingface_local", + "huggingface_id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", + "premade_chat_template": true, + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2024-01-11", + "open_weight": true, + "parameters": "46.7B" + }, + { + "model_name": "Smaug-72B-v0.1", + "backend": "huggingface_local", + "huggingface_id": "abacusai/Smaug-72B-v0.1", + "premade_chat_template": false, + "custom_chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\n' + system_message + '\n<>\n\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}", + "eos_to_cull": "<\\|endoftext\\|>", + "release_date": "2024-02-02", + "open_weight": true, + "parameters": "72B" + }, + { + "model_name": "Smaug-34B-v0.1", + "backend": "huggingface_local", + "huggingface_id": "abacusai/Smaug-34B-v0.1", + "premade_chat_template": false, + "custom_chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\n' + system_message + '\n<>\n\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}", + "eos_to_cull": "<\\|endoftext\\|>", + "release_date": "2024-01-25", + "open_weight": true, + "parameters": "34B" + }, + { + "model_name": "Qwen1.5-7B-Chat", + "backend": "huggingface_local", + "huggingface_id": "Qwen/Qwen1.5-7B-Chat", + "premade_chat_template": true, + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2024-01-30", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "Qwen1.5-72B-Chat", + "backend": "huggingface_local", + "huggingface_id": "Qwen/Qwen1.5-72B-Chat", + "premade_chat_template": true, + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2024-01-30", + "open_weight": true, + "parameters": "72B" + }, + { + "model_name": "Swallow-70b-instruct-v0.1", + "backend": "huggingface_local", + "huggingface_id": "tokyotech-llm/Swallow-70b-instruct-v0.1", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2023-12-19", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "Phi-3-mini-128k-instruct", + "backend": "huggingface_local", + "huggingface_id": "microsoft/Phi-3-mini-128k-instruct", + "premade_chat_template": true, + "eos_to_cull": "<\\|endoftext\\|>", + "release_date": "2024-04-22", + "open_weight": true, + "parameters": "3.8B" + }, + { + "model_name": "Starling-LM-7B-beta", + "backend": "huggingface_local", + "huggingface_id": "Nexusflow/Starling-LM-7B-beta", + "premade_chat_template": true, + "eos_to_cull": "<\\|end_of_turn\\|>", + "release_date": "2024-03-19", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "Qwen2-7B-Instruct", + "backend": "huggingface_local", + "huggingface_id": "Qwen/Qwen2-7B-Instruct", + "premade_chat_template": true, + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2024-06-04", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "Qwen2-72B-Instruct", + "backend": "huggingface_local", + "huggingface_id": "Qwen/Qwen2-72B-Instruct", + "premade_chat_template": true, + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2024-05-28", + "open_weight": true, + "parameters": "72B" + }, + { + "model_name": "Llama-3-SauerkrautLM-70b-Instruct", + "backend": "huggingface_local", + "huggingface_id": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct", + "premade_chat_template": true, + "eos_to_cull": "<\\|eot_id\\|>", + "release_date": "2024-04-24", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "aya-23-8B", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "CohereForAI/aya-23-8B", + "premade_chat_template": true, + "eos_to_cull": "<\\|END_OF_TURN_TOKEN\\|>", + "release_date": "2024-05-19", + "open_weight": true, + "parameters": "8B" + }, + { + "model_name": "aya-23-35B", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "CohereForAI/aya-23-35B", + "premade_chat_template": true, + "eos_to_cull": "<\\|END_OF_TURN_TOKEN\\|>", + "release_date": "2024-05-19", + "open_weight": true, + "parameters": "35B" + }, + { + "model_name": "gemma-2-9b-it", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "google/gemma-2-9b-it", + "premade_chat_template": true, + "eos_to_cull": "\n*", + "release_date": "2024-06-24", + "open_weight": true, + "parameters": "9B" + }, + { + "model_name": "gemma-2-27b-it", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "google/gemma-2-27b-it", + "premade_chat_template": true, + "eos_to_cull": "\n*", + "release_date": "2024-06-24", + "open_weight": true, + "parameters": "27B" + }, + { + "model_name": "llama-2-7b-chat-hf", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "meta-llama/llama-2-7b-chat-hf", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2023-07-18", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "llama-2-13b-chat-hf", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "meta-llama/llama-2-13b-chat-hf", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2023-07-18", + "open_weight": true, + "parameters": "13B" + }, + { + "model_name": "llama-2-70b-chat-hf", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "meta-llama/llama-2-70b-chat-hf", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2023-07-18", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "gemma-7b-it", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "google/gemma-7b-it", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2024-02-21", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "gemma-1.1-2b-it", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "google/gemma-1.1-2b-it", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2024-03-26", + "open_weight": true, + "parameters": "2B" + }, + { + "model_name": "gemma-1.1-7b-it", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "google/gemma-1.1-7b-it", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2024-03-26", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "codegemma-7b-it", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "google/codegemma-7b-it", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2024-04-09", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "recurrentgemma-2b-it", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "google/recurrentgemma-2b-it", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2024-04-09", + "open_weight": true, + "parameters": "2B" + }, + { + "model_name": "gemma-2-2b-it", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "google/gemma-2-2b-it", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2024-07-16", + "open_weight": true, + "parameters": "2B" + }, + { + "model_name": "Meta-Llama-3.1-8B-Instruct", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "premade_chat_template": true, + "eos_to_cull": "<\\|eot_id\\|>", + "release_date": "2024-07-23", + "open_weight": true, + "parameters": "8B" + }, + { + "model_name": "Meta-Llama-3.1-70B-Instruct", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "meta-llama/Meta-Llama-3.1-70B-Instruct", + "premade_chat_template": true, + "eos_to_cull": "<\\|eot_id\\|>", + "release_date": "2024-07-23", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "Mistral-Large-Instruct-2407", + "backend": "huggingface_local", + "requires_api_key": true, + "huggingface_id": "mistralai/Mistral-Large-Instruct-2407", + "premade_chat_template": true, + "eos_to_cull": "", + "release_date": "2024-07-24", + "open_weight": true, + "parameters": "123B" + }, + { + "model_name": "Qwen2.5-7B-Instruct", + "backend": "huggingface_local", + "huggingface_id": "Qwen/Qwen2.5-7B-Instruct", + "premade_chat_template": true, + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2024-07-24", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "Qwen2.5-14B-Instruct", + "backend": "huggingface_local", + "huggingface_id": "Qwen/Qwen2.5-14B-Instruct", + "premade_chat_template": true, + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2024-07-24", + "open_weight": true, + "parameters": "14B" + }, + { + "model_name": "Qwen2.5-32B-Instruct", + "backend": "huggingface_local", + "huggingface_id": "Qwen/Qwen2.5-32B-Instruct", + "premade_chat_template": true, + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2024-07-24", + "open_weight": true, + "parameters": "32B" + }, + { + "model_name": "Qwen2.5-72B-Instruct", + "backend": "huggingface_local", + "huggingface_id": "Qwen/Qwen2.5-72B-Instruct", + "premade_chat_template": true, + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2024-07-24", + "open_weight": true, + "parameters": "72B" + }, + { + "model_name": "Llama-3.2-1B-Instruct", + "backend": "huggingface_local", + "huggingface_id": "meta-llama/Llama-3.2-1B-Instruct", + "premade_chat_template": true, + "eos_to_cull": "<\\|eot_id\\|>", + "release_date": "2024-09-18", + "open_weight": false, + "parameters": "1B" + }, + { + "model_name": "Llama-3.2-3B-Instruct", + "backend": "huggingface_local", + "huggingface_id": "meta-llama/Llama-3.2-3B-Instruct", + "premade_chat_template": true, + "eos_to_cull": "<\\|eot_id\\|>", + "release_date": "2024-09-18", + "open_weight": false, + "parameters": "3B" + }, + { + "model_name": "EuroLLM-1.7B-Instruct", + "backend": "huggingface_local", + "huggingface_id": "utter-project/EuroLLM-1.7B-Instruct", + "premade_chat_template": true, + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2024-09-24", + "open_weight": true, + "parameters": "1.7B" + }, + { + "model_name": "Qwen1.5-0.5B-Chat-GGUF-q8", + "backend": "llamacpp", + "huggingface_id": "Qwen/Qwen1.5-0.5B-Chat-GGUF", + "filename": "*q8_0.gguf", + "premade_chat_template": true, + "bos_string": "", + "eos_string": "<|im_end|>", + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2024-02-03", + "open_weight": true, + "parameters": "0.5B" + }, + { + "model_name": "CapybaraHermes-2.5-Mistral-7B-GGUF-q4", + "backend": "llamacpp", + "huggingface_id": "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF", + "filename": "*Q4_0.gguf", + "premade_chat_template": true, + "bos_string": "", + "eos_string": "<|im_end|>", + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2024-01-31", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "CapybaraHermes-2.5-Mistral-7B-GGUF-q5", + "backend": "llamacpp", + "huggingface_id": "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF", + "filename": "*Q5_0.gguf", + "premade_chat_template": true, + "bos_string": "", + "eos_string": "<|im_end|>", + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2024-01-31", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "CapybaraHermes-2.5-Mistral-7B-GGUF-q5-k-s", + "backend": "llamacpp", + "huggingface_id": "TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF", + "filename": "*Q5_K_S.gguf", + "premade_chat_template": true, + "bos_string": "", + "eos_string": "<|im_end|>", + "eos_to_cull": "<\\|im_end\\|>", + "release_date": "2024-01-31", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "EstopianMaid-13B-GGUF-q2-k", + "backend": "llamacpp", + "huggingface_id": "TheBloke/EstopianMaid-13B-GGUF", + "filename": "*Q2_K.gguf", + "premade_chat_template": false, + "custom_chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'].strip() + '\\n\\n' %}{% else %}{% set loop_messages = messages %}{% set system_message = '' %}{% endif %}{% if system_message %}{{ bos_token + system_message }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{bos_token + '### Instruction:\\n' + message['content'].strip() + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response:\\n' + message['content'].strip() + eos_token + '\\n\\n' }}{% endif %}{% if loop.last and message['role'] == 'user' and add_generation_prompt %}{{ '### Response:\\n' }}{% endif %}{% endfor %}", + "bos_string": "", + "eos_string": "", + "eos_to_cull": "", + "release_date": "2024-01-26", + "open_weight": true, + "parameters": "13B" + }, + { + "model_name": "EstopianMaid-13B-GGUF-q3-k-s", + "backend": "llamacpp", + "huggingface_id": "TheBloke/EstopianMaid-13B-GGUF", + "filename": "*Q3_K_S.gguf", + "premade_chat_template": false, + "custom_chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'].strip() + '\\n\\n' %}{% else %}{% set loop_messages = messages %}{% set system_message = '' %}{% endif %}{% if system_message %}{{ bos_token + system_message }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{bos_token + '### Instruction:\\n' + message['content'].strip() + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response:\\n' + message['content'].strip() + eos_token + '\\n\\n' }}{% endif %}{% if loop.last and message['role'] == 'user' and add_generation_prompt %}{{ '### Response:\\n' }}{% endif %}{% endfor %}", + "bos_string": "", + "eos_string": "", + "eos_to_cull": "", + "release_date": "2024-01-26", + "open_weight": true, + "parameters": "13B" + }, + { + "model_name": "openchat_3.5-GGUF-q5", + "backend": "llamacpp", + "huggingface_id": "TheBloke/openchat_3.5-GGUF", + "filename": "*Q5_0.gguf", + "premade_chat_template": false, + "custom_chat_template": "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}", + "bos_string": "", + "eos_string": "<|end_of_turn|>", + "eos_to_cull": "<\\|end_of_turn\\|>", + "release_date": "2023-11-02", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "Meta-Llama-3-70B-Instruct-GGUF-q4", + "backend": "llamacpp", + "huggingface_id": "MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF", + "filename": "*Q4_K_M.gguf", + "premade_chat_template": true, + "bos_string": "<|begin_of_text|>", + "eos_string": "<|eot_id|>", + "eos_to_cull": "<\\|eot_id\\|>", + "release_date": "2024-04-18", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "Meta-Llama-3-70B-Instruct-GGUF-q8", + "backend": "llamacpp", + "huggingface_id": "MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF", + "filename": "*Q8_0-00001-of-00002.gguf", + "additional_files": [ + "*Q8_0-00002-of-00002.gguf" + ], + "premade_chat_template": true, + "bos_string": "<|begin_of_text|>", + "eos_string": "<|eot_id|>", + "eos_to_cull": "<\\|eot_id\\|>", + "release_date": "2024-04-18", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "c4ai-command-r-plus-GGUF-q4", + "backend": "llamacpp", + "huggingface_id": "pmysl/c4ai-command-r-plus-GGUF", + "filename": "*Q4_K_M-00001-of-00002.gguf", + "additional_files": [ + "*Q4_K_M-00002-of-00002.gguf" + ], + "premade_chat_template": true, + "bos_string": "", + "eos_string": "<|END_OF_TURN_TOKEN|>", + "eos_to_cull": "<\\|END_OF_TURN_TOKEN\\|>", + "release_date": "2024-04-04", + "open_weight": true, + "parameters": "104B" + }, + { + "model_name": "c4ai-command-r-plus-GGUF-q8", + "backend": "llamacpp", + "huggingface_id": "pmysl/c4ai-command-r-plus-GGUF", + "filename": "*Q8_0-00001-of-00003.gguf", + "additional_files": [ + "*Q8_0-00002-of-00003.gguf", + "*Q8_0-00003-of-00003.gguf" + ], + "premade_chat_template": true, + "bos_string": "", + "eos_string": "<|END_OF_TURN_TOKEN|>", + "eos_to_cull": "<\\|END_OF_TURN_TOKEN\\|>", + "release_date": "2024-04-04", + "open_weight": true, + "parameters": "104B" + }, + { + "model_name": "bakLlava-v1-hf", "backend": "huggingface_multimodal", - "huggingface_id": "llava-hf/llava-1.5-7b-hf", - "model_type": "Vision2Seq", + "huggingface_id": "llava-hf/bakLlava-v1-hf", + "model_class": "transformers.LlavaForConditionalGeneration", + "model_config": { + "low_cpu_mem_usage": true, + "device_map": "auto", + "torch_dtype":"auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_llava_prompt_text", + "response": "backends.multimodal_utils.generate_llava_response", "output_split_prefix": "ASSISTANT:", - "custom_chat_template": "{%- for message in messages -%}{% if message['role'] == 'user' %}{% if message['image'] %}\nUSER: \n{{message['content']}}{% else %}\nUSER:\n{{message['content']}}{% endif %}{% elif message['role'] == 'assistant' %}\nASSISTANT:{{message['content']}}{% endif %}{% endfor %}\nASSISTANT:", + "do_sample": false, + "supports_multiple_images": true, "release_date": "2024-01-03", "open_weight": true, "parameters": "7B" - }, - { + }, + { + "model_name": "llava-1.5-7b-hf", + "backend": "huggingface_multimodal", + "huggingface_id": "llava-hf/llava-1.5-7b-hf", + "model_class": "transformers.LlavaForConditionalGeneration", + "model_config": { + "low_cpu_mem_usage": true, + "device_map": "auto", + "torch_dtype":"auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_llava_prompt_text", + "response": "backends.multimodal_utils.generate_llava_response", + "output_split_prefix": "ASSISTANT:", + "do_sample": false, + "supports_multiple_images": true, + "release_date": "2024-01-03", + "open_weight": true, + "parameters": "7B" + }, + { "model_name": "llava-1.5-13b-hf", "backend": "huggingface_multimodal", "huggingface_id": "llava-hf/llava-1.5-13b-hf", - "model_type": "Vision2Seq", + "model_class": "transformers.LlavaForConditionalGeneration", + "model_config": { + "low_cpu_mem_usage": true, + "device_map": "auto", + "torch_dtype":"auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_llava_prompt_text", + "response": "backends.multimodal_utils.generate_llava_response", "output_split_prefix": "ASSISTANT:", - "custom_chat_template": "{%- for message in messages -%}{% if message['role'] == 'user' %}{% if message['image'] %}\nUSER: \n{{message['content']}}{% else %}\nUSER:\n{{message['content']}}{% endif %}{% elif message['role'] == 'assistant' %}\nASSISTANT:{{message['content']}}{% endif %}{% endfor %}\nASSISTANT:", + "do_sample": false, + "supports_multiple_images": true, "release_date": "2024-01-03", "open_weight": true, "parameters": "13B" - }, - { + }, + { + "model_name": "llama3-llava-next-8b-hf", + "backend": "huggingface_multimodal", + "huggingface_id": "llava-hf/llama3-llava-next-8b-hf", + "model_class": "transformers.LlavaNextForConditionalGeneration", + "model_config": { + "low_cpu_mem_usage": true, + "device_map": "auto", + "torch_dtype":"auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_llava_prompt_text", + "response": "backends.multimodal_utils.generate_llava_response", + "output_split_prefix": "ASSISTANT:", + "do_sample": false, + "supports_multiple_images": true, + "release_date": "2024-07-19", + "open_weight": true, + "parameters": "8B" + }, + { + "model_name": "llava-next-110b-hf", + "backend": "huggingface_multimodal", + "huggingface_id": "llava-hf/llava-next-110b-hf", + "model_class": "transformers.LlavaNextForConditionalGeneration", + "model_config": { + "low_cpu_mem_usage": true, + "device_map": "auto", + "torch_dtype":"auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_llava_prompt_text", + "response": "backends.multimodal_utils.generate_llava_response", + "output_split_prefix": "ASSISTANT:", + "do_sample": false, + "supports_multiple_images": true, + "release_date": "2024-07-19", + "open_weight": true, + "parameters": "110B" + }, + { + "model_name": "llava-next-72b-hf", + "backend": "huggingface_multimodal", + "huggingface_id": "llava-hf/llava-next-72b-hf", + "model_class": "transformers.LlavaNextForConditionalGeneration", + "model_config": { + "low_cpu_mem_usage": true, + "device_map": "auto", + "torch_dtype":"auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_llava_prompt_text", + "response": "backends.multimodal_utils.generate_llava_response", + "output_split_prefix": "ASSISTANT:", + "do_sample": false, + "supports_multiple_images": true, + "release_date": "2024-07-19", + "open_weight": true, + "parameters": "72B" + }, + { "model_name": "llava-v1.6-34b-hf", "backend": "huggingface_multimodal", "huggingface_id": "llava-hf/llava-v1.6-34b-hf", - "model_type": "Vision2Seq", - "output_split_prefix": "assistant", - "not_fast": true, - "padding": true, - "custom_chat_template": "<|im_start|>system\nAnswer the questions.<|im_end|>{%- for message in messages -%}{% if message['role'] == 'user' %}{% if message['image']%}<|im_start|>user\n\n{{message['content']}}<|im_end|>{% else %}<|im_start|>\nuser\n{{message['content']}}<|im_end|>{% endif %}{% elif message['role'] == 'assistant' %}<|im_start|>assistant\n{{message['content']}}<|im_end|>{% endif %}{% endfor %}<|im_start|>assistant\n", + "model_class": "transformers.LlavaNextForConditionalGeneration", + "model_config": { + "low_cpu_mem_usage": true, + "device_map": "auto", + "torch_dtype":"auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_llava_prompt_text", + "response": "backends.multimodal_utils.generate_llava_response", + "output_split_prefix": "ASSISTANT:", + "do_sample": false, + "supports_multiple_images": true, "release_date": "2024-03-17", "open_weight": true, "parameters": "34B" - }, - { + }, + { "model_name": "llava-v1.6-mistral-7b-hf", "backend": "huggingface_multimodal", "huggingface_id": "llava-hf/llava-v1.6-mistral-7b-hf", - "model_type": "Vision2Seq", - "output_split_prefix": "[/INST]", - "padding": true, - "custom_chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{% if message['image']%}[INST] \n{{message['content']}} [/INST]{% else %}[INST]\n{{message['content']}} [/INST]{% endif %}{% elif message['role'] == 'assistant' %}{{message['content']}}{% endif %}{% endfor %}", - "release_date": "2024-02-20", + "model_class": "transformers.LlavaNextForConditionalGeneration", + "model_config": { + "low_cpu_mem_usage": true, + "device_map": "auto", + "torch_dtype":"auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_llava_prompt_text", + "response": "backends.multimodal_utils.generate_llava_response", + "output_split_prefix": "ASSISTANT:", + "do_sample": false, + "supports_multiple_images": true, + "release_date": "2024-03-17", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "llava-v1.6-vicuna-7b-hf", + "backend": "huggingface_multimodal", + "huggingface_id": "llava-hf/llava-v1.6-vicuna-7b-hf", + "model_class": "transformers.LlavaNextForConditionalGeneration", + "model_config": { + "low_cpu_mem_usage": true, + "device_map": "auto", + "torch_dtype":"auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_llava_prompt_text", + "response": "backends.multimodal_utils.generate_llava_response", + "output_split_prefix": "ASSISTANT:", + "do_sample": false, + "supports_multiple_images": true, + "release_date": "2024-03-17", "open_weight": true, "parameters": "7B" - }, - { + }, + { "model_name": "llava-v1.6-vicuna-13b-hf", "backend": "huggingface_multimodal", "huggingface_id": "llava-hf/llava-v1.6-vicuna-13b-hf", - "model_type": "Vision2Seq", + "model_class": "transformers.LlavaNextForConditionalGeneration", + "model_config": { + "low_cpu_mem_usage": true, + "device_map": "auto", + "torch_dtype":"auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_llava_prompt_text", + "response": "backends.multimodal_utils.generate_llava_response", "output_split_prefix": "ASSISTANT:", - "padding": true, - "custom_chat_template": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.{% for message in messages %}{% if message['role'] == 'user' %}{% if message['image'] %}USER:\n{{message['content']}}{% else %}USER:\n{{message['content']}}{% endif %}{% elif message['role'] == 'assistant' %}ASSISTANT:{{message['content']}}{% endif %}{% endfor %}ASSISTANT:", + "do_sample": false, + "supports_multiple_images": true, "release_date": "2024-03-17", "open_weight": true, "parameters": "13B" - }, - { - "model_name": "llava-v1.6-vicuna-7b-hf", + }, + { + "model_name": "llava-onevision-qwen2-7b-ov-chat-hf", "backend": "huggingface_multimodal", - "huggingface_id": "llava-hf/llava-v1.6-vicuna-7b-hf", - "model_type": "Vision2Seq", - "output_split_prefix": "ASSISTANT:", - "padding": true, - "custom_chat_template": "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.{% for message in messages %}{% if message['role'] == 'user' %}{% if message['image'] %}USER:\n{{message['content']}}{% else %}USER:\n{{message['content']}}{% endif %}{% elif message['role'] == 'assistant' %}ASSISTANT:{{message['content']}}{% endif %}{% endfor %}ASSISTANT:", - "release_date": "2024-03-17", + "huggingface_id": "llava-hf/llava-onevision-qwen2-7b-ov-chat-hf", + "model_class": "transformers.LlavaOnevisionForConditionalGeneration", + "model_config": { + "low_cpu_mem_usage": true, + "device_map": "auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_llava_prompt_text", + "response": "backends.multimodal_utils.generate_llava_response", + "output_split_prefix": "assistant:", + "do_sample": false, + "supports_multiple_images": true, + "release_date": "2024-09-16", "open_weight": true, "parameters": "7B" - }, - { + }, + { + "model_name": "llava-onevision-qwen2-72b-ov-chat-hf", + "backend": "huggingface_multimodal", + "huggingface_id": "llava-hf/llava-onevision-qwen2-72b-ov-chat-hf", + "model_class": "transformers.LlavaOnevisionForConditionalGeneration", + "model_config": { + "low_cpu_mem_usage": true, + "device_map": "auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_llava_prompt_text", + "response": "backends.multimodal_utils.generate_llava_response", + "output_split_prefix": "assistant:", + "do_sample": false, + "supports_multiple_images": true, + "release_date": "2024-09-16", + "open_weight": true, + "parameters": "72B" + }, + { "model_name": "idefics-80b-instruct", "backend": "huggingface_multimodal", "huggingface_id": "HuggingFaceM4/idefics-80b-instruct", - "model_type": "Idefics", + "model_class": "transformers.IdeficsForVisionText2Text", + "model_config": { + "torch_dtype":"auto", + "device_map": "auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_idefics_prompt_text", + "response": "backends.multimodal_utils.generate_idefics_response", "eos_to_cull": "", "output_split_prefix": "Assistant:", "supports_multiple_images": true, "release_date": "2023-07-24", "open_weight": true, "parameters": "80B" - }, - { - "model_name": "idefics-9b-instruct", + }, + { + "model_name": "idefics-9b-instruct", + "backend": "huggingface_multimodal", + "huggingface_id": "HuggingFaceM4/idefics-9b-instruct", + "model_class": "transformers.IdeficsForVisionText2Text", + "model_config": { + "torch_dtype":"auto", + "device_map": "auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_idefics_prompt_text", + "response": "backends.multimodal_utils.generate_idefics_response", + "eos_to_cull": "", + "output_split_prefix": "Assistant:", + "supports_multiple_images": true, + "release_date": "2023-07-24", + "open_weight": true, + "parameters": "9B" + }, + { + "model_name": "InternVL2-Llama3-76B", "backend": "huggingface_multimodal", - "huggingface_id": "HuggingFaceM4/idefics-9b-instruct", - "model_type": "Idefics", - "eos_to_cull": "", + "huggingface_id": "OpenGVLab/InternVL2-Llama3-76B", + "model_class": "transformers.AutoModel", + "model_config": { + "torch_dtype": "auto", + "load_in_8bit": true, + "low_cpu_mem_usage": true, + "use_flash_attn": true, + "device_map": "backends.multimodal_utils.split_model" + }, + "processor_class": "transformers.AutoTokenizer", + "processor_config": { + "use_fast": false + }, + "prompt": "backends.multimodal_utils.generate_internvl2_prompt_text", + "response": "backends.multimodal_utils.generate_internvl2_response", + "trust_remote_code": true, + "do_sample": false, + "supports_multiple_images": true, + "release_date": "2024-07-15", + "open_weight": true, + "parameters": "76B" + }, + { + "model_name": "InternVL2-40B", + "backend": "huggingface_multimodal", + "huggingface_id": "OpenGVLab/InternVL2-40B", + "model_class": "transformers.AutoModel", + "model_config": { + "torch_dtype": "auto", + "load_in_8bit": true, + "low_cpu_mem_usage": true, + "use_flash_attn": true, + "device_map": "backends.multimodal_utils.split_model" + }, + "processor_class": "transformers.AutoTokenizer", + "processor_config": { + "use_fast": false + }, + "prompt": "backends.multimodal_utils.generate_internvl2_prompt_text", + "response": "backends.multimodal_utils.generate_internvl2_response", + "trust_remote_code": true, + "do_sample": false, + "supports_multiple_images": true, + "release_date": "2024-07-15", + "open_weight": true, + "parameters": "40B" + }, + { + "model_name": "InternVL2-26B", + "backend": "huggingface_multimodal", + "huggingface_id": "OpenGVLab/InternVL2-26B", + "model_class": "transformers.AutoModel", + "model_config": { + "torch_dtype": "auto", + "load_in_8bit": true, + "low_cpu_mem_usage": true, + "use_flash_attn": true, + "device_map": "backends.multimodal_utils.split_model" + }, + "processor_class": "transformers.AutoTokenizer", + "processor_config": { + "use_fast": false + }, + "prompt": "backends.multimodal_utils.generate_internvl2_prompt_text", + "response": "backends.multimodal_utils.generate_internvl2_response", + "trust_remote_code": true, + "do_sample": false, + "supports_multiple_images": true, + "release_date": "2024-07-15", + "open_weight": true, + "parameters": "26B" + }, + { + "model_name": "InternVL2-8B", + "backend": "huggingface_multimodal", + "huggingface_id": "OpenGVLab/InternVL2-8B", + "model_class": "transformers.AutoModel", + "model_config": { + "torch_dtype": "auto", + "load_in_8bit": true, + "low_cpu_mem_usage": true, + "use_flash_attn": true, + "device_map": "backends.multimodal_utils.split_model" + }, + "processor_class": "transformers.AutoTokenizer", + "processor_config": { + "use_fast": false + }, + "prompt": "backends.multimodal_utils.generate_internvl2_prompt_text", + "response": "backends.multimodal_utils.generate_internvl2_response", + "trust_remote_code": true, + "do_sample": false, + "supports_multiple_images": true, + "release_date": "2024-07-15", + "open_weight": true, + "parameters": "8B" + }, + { + "model_name": "internlm-xcomposer2d5-7b", + "backend": "huggingface_multimodal", + "huggingface_id": "internlm/internlm-xcomposer2d5-7b", + "automodel_type": "transformers.AutoModel", + "model_class": "backends.multimodal_utils.intern_utils.InternlmMLLM", + "use_tokenizer": true, + "supports_multiple_images": true, + "trust_remote_code": true, + "use_bf16": true, + "output_split_prefix": "", + "not_distributed": true, + "release_date": "2024-07-02", + "open_weight": true, + "parameters": "7B" + }, + { + "model_name": "Idefics3-8B-Llama3", + "backend": "huggingface_multimodal", + "huggingface_id": "HuggingFaceM4/Idefics3-8B-Llama3", + "model_class": "transformers.AutoModelForVision2Seq", + "model_config": { + "torch_dtype": "auto", + "device_map": "auto" + }, + "processor_class": "transformers.AutoProcessor", + "processor_config": {}, + "prompt": "backends.multimodal_utils.generate_llava_prompt_text", + "response": "backends.multimodal_utils.generate_llava_response", + "supports_multiple_images": true, + "trust_remote_code": true, "output_split_prefix": "Assistant:", + "release_date": "2024-08-05", + "open_weight": true, + "parameters": "8B" + }, + { + "model_name": "dolphin-vision-72b", + "backend": "huggingface_multimodal", + "huggingface_id": "cognitivecomputations/dolphin-vision-72b", + "automodel_type": "transformers.AutoModelForCausalLM", + "model_class": "backends.multimodal_utils.dolphin_utils.DolphinMLLM", + "use_tokenizer": true, "supports_multiple_images": true, - "release_date": "2023-07-24", + "trust_remote_code": true, + "use_bf16": true, + "output_split_prefix": "", + "low_cpu_mem_usage": true, + "release_date": "2024-06-28", "open_weight": true, - "parameters": "9B" - }, - { - "model_name": "InternVL2-Llama3-76B", - "backend": "huggingface_multimodal", - "huggingface_id": "OpenGVLab/InternVL2-Llama3-76B", - "automodel_type": "transformers.AutoModel", - "model_class": "backends.multimodal_utils.internvl_utils.InternvlMLLM", - "use_tokenizer": true, - "supports_multiple_images": true, - "trust_remote_code": true, - "use_bf16": true, - "use_fast": false, - "output_split_prefix": "Assistant:", - "load_in_8bit": false, - "low_cpu_mem_usage": true, - "custom_device_map": true, - "device_map": "backends.multimodal_utils.internvl_utils.split_model", - "release_date": "2024-07-15", - "open_weight": true, - "parameters": "76B" - }, - { - "model_name": "InternVL2-40B", - "backend": "huggingface_multimodal", - "huggingface_id": "OpenGVLab/InternVL2-40B", - "automodel_type": "transformers.AutoModel", - "model_class": "backends.multimodal_utils.internvl_utils.InternvlMLLM", - "use_tokenizer": true, - "supports_multiple_images": true, - "trust_remote_code": true, - "use_bf16": true, - "use_fast": false, - "output_split_prefix": "Assistant:", - "load_in_8bit": false, - "low_cpu_mem_usage": true, - "custom_device_map": true, - "device_map": "backends.multimodal_utils.internvl_utils.split_model", - "release_date": "2024-07-15", - "open_weight": true, - "parameters": "40B" - }, - { - "model_name": "InternVL2-26B", - "backend": "huggingface_multimodal", - "huggingface_id": "OpenGVLab/InternVL2-26B", - "automodel_type": "transformers.AutoModel", - "model_class": "backends.multimodal_utils.internvl_utils.InternvlMLLM", - "use_tokenizer": true, - "supports_multiple_images": true, - "trust_remote_code": true, - "use_bf16": true, - "use_fast": false, - "output_split_prefix": "Assistant:", - "load_in_8bit": false, - "low_cpu_mem_usage": true, - "custom_device_map": true, - "device_map": "backends.multimodal_utils.internvl_utils.split_model", - "release_date": "2024-07-15", - "open_weight": true, - "parameters": "26B" - }, - { - "model_name": "InternVL2-8B", - "backend": "huggingface_multimodal", - "huggingface_id": "OpenGVLab/InternVL2-8B", - "automodel_type": "transformers.AutoModel", - "model_class": "backends.multimodal_utils.internvl_utils.InternvlMLLM", - "use_tokenizer": true, - "supports_multiple_images": true, - "trust_remote_code": true, - "use_bf16": true, - "use_fast": false, - "output_split_prefix": "Assistant:", - "load_in_8bit": false, - "low_cpu_mem_usage": true, - "custom_device_map": true, - "device_map": "backends.multimodal_utils.internvl_utils.split_model", - "release_date": "2024-07-15", - "open_weight": true, - "parameters": "8B" - }, - { - "model_name": "internlm-xcomposer2d5-7b", - "backend": "huggingface_multimodal", - "huggingface_id": "internlm/internlm-xcomposer2d5-7b", - "automodel_type": "transformers.AutoModel", - "model_class": "backends.multimodal_utils.intern_utils.InternlmMLLM", - "use_tokenizer": true, - "supports_multiple_images": true, - "trust_remote_code": true, - "use_bf16": true, - "output_split_prefix": "", - "not_distributed": true, - "release_date": "2024-07-02", - "open_weight": true, - "parameters": "7B" - }, - { - "model_name": "Idefics3-8B-Llama3", - "backend": "huggingface_multimodal", - "huggingface_id": "HuggingFaceM4/Idefics3-8B-Llama3", - "automodel_type": "transformers.AutoModelForVision2Seq", - "model_class": "backends.multimodal_utils.idefics3_utils.Idefics3MLLM", - "use_tokenizer": false, - "supports_multiple_images": true, - "trust_remote_code": true, - "use_bf16": false, - "output_split_prefix": "Assistant:", - "low_cpu_mem_usage": true, - "release_date": "2024-08-05", - "open_weight": true, - "parameters": "8B" - }, - { - "model_name": "dolphin-vision-72b", - "backend": "huggingface_multimodal", - "huggingface_id": "cognitivecomputations/dolphin-vision-72b", - "automodel_type": "transformers.AutoModelForCausalLM", - "model_class": "backends.multimodal_utils.dolphin_utils.DolphinMLLM", - "use_tokenizer": true, - "supports_multiple_images": true, - "trust_remote_code": true, - "use_bf16": true, - "output_split_prefix": "", - "low_cpu_mem_usage": true, - "release_date": "2024-06-28", - "open_weight": true, - "parameters": "72B" - }, - { - "model_name": "Phi-3.5-vision-instruct", - "backend": "huggingface_multimodal", - "huggingface_id": "microsoft/Phi-3.5-vision-instruct", - "automodel_type": "transformers.AutoModelForCausalLM", - "model_class": "backends.multimodal_utils.phi_utils.PhiMLLM", - "supports_multiple_images": true, - "trust_remote_code": true, - "output_split_prefix": "", - "low_cpu_mem_usage": true, - "release_date": "2024-08-17", - "open_weight": true, - "parameters": "4B" - }, - { - "model_name": "Pixtral-12B-2409", - "backend": "huggingface_multimodal", - "huggingface_id": "mistralai/Pixtral-12B-2409", - "automodel_type": "vllm.LLM", - "model_class": "backends.multimodal_utils.pixtral_utils.PixtralMLLM", - "supports_multiple_images": true, - "trust_remote_code": false, - "output_split_prefix": "", - "low_cpu_mem_usage": true, - "release_date": "2024-09-11", - "open_weight": true, - "parameters": "12B", - "use_vllm": true, - "tokenizer_mode": "mistral", - "vllm_context": 8192 - } -] \ No newline at end of file + "parameters": "72B" + }, + { + "model_name": "Phi-3.5-vision-instruct", + "backend": "huggingface_multimodal", + "huggingface_id": "microsoft/Phi-3.5-vision-instruct", + "automodel_type": "transformers.AutoModelForCausalLM", + "model_class": "backends.multimodal_utils.phi_utils.PhiMLLM", + "supports_multiple_images": true, + "trust_remote_code": true, + "output_split_prefix": "", + "low_cpu_mem_usage": true, + "release_date": "2024-08-17", + "open_weight": true, + "parameters": "4B" + }, + { + "model_name": "Pixtral-12B-2409", + "backend": "huggingface_multimodal", + "huggingface_id": "mistralai/Pixtral-12B-2409", + "automodel_type": "vllm.LLM", + "model_class": "backends.multimodal_utils.pixtral_utils.PixtralMLLM", + "supports_multiple_images": true, + "trust_remote_code": false, + "output_split_prefix": "", + "low_cpu_mem_usage": true, + "release_date": "2024-09-11", + "open_weight": true, + "parameters": "12B", + "use_vllm": true, + "tokenizer_mode": "mistral", + "vllm_context": 8192 + }, + { + "model_name": "Meta-Llama-3.1-70B-Instruct-FP8-neuralmagic-1gpu", + "backend": "vllm", + "huggingface_id": "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8", + "number_gpus": 1, + "premade_chat_template": true, + "eos_to_cull": "<\\|eot_id\\|>", + "release_date": "2023-07-23", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "Meta-Llama-3.1-70B-Instruct-FP8-neuralmagic-1gpu-4k", + "backend": "vllm", + "huggingface_id": "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8", + "number_gpus": 1, + "context_limit": 4096, + "premade_chat_template": true, + "eos_to_cull": "<\\|eot_id\\|>", + "release_date": "2023-07-23", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "Meta-Llama-3.1-70B-Instruct-FP8-neuralmagic-1gpu-8k", + "backend": "vllm", + "huggingface_id": "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8", + "number_gpus": 1, + "context_limit": 8192, + "premade_chat_template": true, + "eos_to_cull": "<\\|eot_id\\|>", + "release_date": "2023-07-23", + "open_weight": true, + "parameters": "70B" + }, + { + "model_name": "Meta-Llama-3.1-70B-Instruct-FP8-neuralmagic-2gpu", + "backend": "vllm", + "huggingface_id": "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8", + "number_gpus": 2, + "premade_chat_template": true, + "eos_to_cull": "<\\|eot_id\\|>", + "release_date": "2023-07-23", + "open_weight": true, + "parameters": "70B" + } + ] \ No newline at end of file diff --git a/clemcore/backends/multimodal_utils.py b/clemcore/backends/multimodal_utils.py new file mode 100644 index 0000000000..cbd6572036 --- /dev/null +++ b/clemcore/backends/multimodal_utils.py @@ -0,0 +1,533 @@ +""" +Util functions for multimodal models. +""" + +from typing import List, Dict, Tuple, Any +import math +import numpy as np +import torch +import torchvision.transforms as T +from PIL import Image +from torchvision.transforms.functional import InterpolationMode +from transformers.image_utils import load_image +import requests +from io import BytesIO +import logging + +logger = logging.getLogger(__name__) + +""" +##### INTERNVL2 TYPE MODELS ##### +""" + +IMAGENET_MEAN = (0.485, 0.456, 0.406) +IMAGENET_STD = (0.229, 0.224, 0.225) + + +def generate_history_internvl2(messages: List[str]) -> Tuple[List[Tuple], str]: + """ + Separates the history and query from the list of messages in the current game instance. + Compatible with InternVL2 and Nvidia NVLM models. + + Args: + messages: A list containing user messages, system messages or assistant responses. + + Returns: + A list of tuples containing the history and a user message string, passed to the model in the current game instance. + + Raises: + ValueError: if msg['role'] is different than 'user', 'system', or 'assistant'. + """ + + history = [] + for msg in messages: + if msg['role'] == 'system': + continue # Skip the system message, Not passed to the model. Ref - https://huggingface.co/OpenGVLab/InternVL2-40B + elif msg['role'] == 'user': + if 'image' in msg: + user_message = f"\n{msg['content']}" # Add token if image is passed in this instance. + else: + user_message = msg['content'] + elif msg['role'] == 'assistant': + history.append((user_message, msg['content'])) + else: + raise ValueError(f"Invalid role: {msg['role']}. Expected 'user', 'system', or 'assistant'.") + + return history, user_message + + +def split_model(model_name): + """ + Splits the model across available GPUs based on the model name. + + Args: + model_name (str): The name of the model to be split. + Expected values include 'InternVL2-1B', 'InternVL2-2B', + 'InternVL2-4B', 'InternVL2-8B', 'InternVL2-26B', + 'InternVL2-40B', 'InternVL2-Llama3-76B'. + + Returns: + dict: A mapping of model layers to GPU indices. + """ + device_map = {} + world_size = torch.cuda.device_count() + num_layers = { + 'InternVL2-1B': 24, 'InternVL2-2B': 24, 'InternVL2-4B': 32, 'InternVL2-8B': 32, + 'InternVL2-26B': 48, 'InternVL2-40B': 60, 'InternVL2-Llama3-76B': 80}[model_name] + # Since the first GPU will be used for ViT, treat it as half a GPU. + num_layers_per_gpu = math.ceil(num_layers / (world_size - 0.5)) + num_layers_per_gpu = [num_layers_per_gpu] * world_size + num_layers_per_gpu[0] = math.ceil(num_layers_per_gpu[0] * 0.5) + layer_cnt = 0 + for i, num_layer in enumerate(num_layers_per_gpu): + for j in range(num_layer): + device_map[f'language_model.model.layers.{layer_cnt}'] = i + layer_cnt += 1 + device_map['vision_model'] = 0 + device_map['mlp1'] = 0 + device_map['language_model.model.tok_embeddings'] = 0 + device_map['language_model.model.embed_tokens'] = 0 + device_map['language_model.output'] = 0 + device_map['language_model.model.norm'] = 0 + device_map['language_model.lm_head'] = 0 + device_map[f'language_model.model.layers.{num_layers - 1}'] = 0 + + return device_map + +def build_transform(input_size): + """Builds a transformation pipeline for image preprocessing. + + Args: + input_size (int): The size to which the image will be resized. + + Returns: + torchvision.transforms.Compose: A composed transform for the image. + """ + MEAN, STD = IMAGENET_MEAN, IMAGENET_STD + transform = T.Compose([ + T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img), + T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC), + T.ToTensor(), + T.Normalize(mean=MEAN, std=STD) + ]) + return transform + +def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size): + """Finds the closest aspect ratio from a set of target ratios. + + Args: + aspect_ratio (float): The aspect ratio of the original image. + target_ratios (list): A list of target aspect ratios. + width (int): The width of the original image. + height (int): The height of the original image. + image_size (int): The size of the image for comparison. + + Returns: + tuple: The best aspect ratio found. + """ + best_ratio_diff = float('inf') + best_ratio = (1, 1) + area = width * height + for ratio in target_ratios: + target_aspect_ratio = ratio[0] / ratio[1] + ratio_diff = abs(aspect_ratio - target_aspect_ratio) + if ratio_diff < best_ratio_diff: + best_ratio_diff = ratio_diff + best_ratio = ratio + elif ratio_diff == best_ratio_diff: + if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]: + best_ratio = ratio + return best_ratio + +def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=False): + """Processes the image to fit the closest aspect ratio and splits it into blocks. + + Args: + image (PIL.Image): The image to be processed. + min_num (int): Minimum number of blocks. + max_num (int): Maximum number of blocks. + image_size (int): The size of the image. + use_thumbnail (bool): Whether to create a thumbnail. + + Returns: + list: A list of processed image blocks. + """ + orig_width, orig_height = image.size + aspect_ratio = orig_width / orig_height + + # calculate the existing image aspect ratio + target_ratios = set( + (i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if + i * j <= max_num and i * j >= min_num) + target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1]) + + # find the closest aspect ratio to the target + target_aspect_ratio = find_closest_aspect_ratio( + aspect_ratio, target_ratios, orig_width, orig_height, image_size) + + # calculate the target width and height + target_width = image_size * target_aspect_ratio[0] + target_height = image_size * target_aspect_ratio[1] + blocks = target_aspect_ratio[0] * target_aspect_ratio[1] + + # resize the image + resized_img = image.resize((target_width, target_height)) + processed_images = [] + for i in range(blocks): + box = ( + (i % (target_width // image_size)) * image_size, + (i // (target_width // image_size)) * image_size, + ((i % (target_width // image_size)) + 1) * image_size, + ((i // (target_width // image_size)) + 1) * image_size + ) + # split the image + split_img = resized_img.crop(box) + processed_images.append(split_img) + assert len(processed_images) == blocks + if use_thumbnail and len(processed_images) != 1: + thumbnail_img = image.resize((image_size, image_size)) + processed_images.append(thumbnail_img) + return processed_images + +def load_internvl2_image(image_file, input_size=448, max_num=12): + """Loads an image file and applies transformations. + + Args: + image_file (str): The path to the image file. + input_size (int): The size to which the image will be resized. + max_num (int): Maximum number of blocks to create. + + Returns: + torch.Tensor: A tensor containing the pixel values of the processed images. + """ + if image_file.startswith("http"): + response = requests.get(image_file) + image = Image.open(BytesIO(response.content)).convert('RGB') + else: + image = Image.open(image_file).convert('RGB') + + transform = build_transform(input_size=input_size) + images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num) + pixel_values = [transform(image) for image in images] + pixel_values = torch.stack(pixel_values) + return pixel_values + +def get_internvl2_image(messages: List[str], device: str): + """ + Extracts the last user message containing image data and loads the corresponding images. + + Args: + messages (List[str]): A list of message dictionaries containing user, system, and assistant messages. + device (str): The device to which the image tensors will be moved (e.g., 'cuda' or 'cpu'). + + Returns: + torch.Tensor: A tensor containing the pixel values of the processed images. + + Raises: + ValueError: If no user message is found. + """ + # Get last user message + last_user_message = next((msg for msg in reversed(messages) if msg['role'] == 'user'), None) + + if last_user_message is None: + raise ValueError("No user message found in the provided messages.") + + logger.info("*" * 50 + " Last User Message " + "*" * 50) + logger.info(f"\n : {last_user_message} \n") + + if 'image' in last_user_message: + logger.info("*" * 50 + " Images Number " + str(len(last_user_message['image'])) + "*" * 50) + # Load all images and concatenate them into a single tensor + pixel_values = torch.cat( + [load_internvl2_image(img, max_num=12).to(torch.bfloat16).to(device) for img in last_user_message['image']] + , dim=0) + else: + pixel_values = None + logger.info("*" * 50 + " Pixel Values not found " + "*" * 50) + + return pixel_values + +def generate_internvl2_prompt_text(messages: List[str], **prompt_kwargs) -> str: + """Generates input text for the InternVL2 model from a list of messages. + + Args: + messages (List[str]): A list of message dictionaries containing user, system, and assistant messages. + + Returns: + str: The concatenated prompt text generated from the message history and the last user question. + """ + prompt_text = "" + history, question = generate_history_internvl2(messages=messages) + if history: + for t in history: + prompt_text += t[0] + t[1] + prompt_text += question + return prompt_text + +def generate_internvl2_response(**response_kwargs) -> str: + """Generates a response from the InternVL2 model based on the provided messages and configuration. + + Args: + **response_kwargs: A dictionary containing the following keys: + - messages (List[str]): A list of message dictionaries. + - device (str): The device to which the image tensors will be moved (e.g., 'cuda' or 'cpu'). + - max_tokens (int): The maximum number of tokens to generate. + - model: The model instance used for generating responses. + - processor: The processor instance used for processing images. + + Returns: + str: The generated response from the model. + + Raises: + RuntimeError: If the model fails to generate a response. + """ + messages = response_kwargs['messages'] + device = response_kwargs['device'] + max_tokens = response_kwargs['max_tokens'] + model = response_kwargs['model'] + processor = response_kwargs['processor'] + do_sample = response_kwargs['do_sample'] + + images = get_internvl2_image(messages=messages, device=device) + history, question = generate_history_internvl2(messages=messages) + + logger.info("*" * 50 + " Question " + "*" * 50) + logger.info(f"\n : {question} \n") + + logger.info("*" * 50 + " History " + "*" * 50) + logger.info(f"\n : {history} \n") + + if not history: + history = None + generation_config = dict(max_new_tokens=max_tokens, do_sample=do_sample) + try: + generated_response, _ = model.chat(processor, images, question, generation_config, + history=history, return_history=True) + + except Exception as e: + raise RuntimeError("Failed to generate response from the model.") from e + + + + return generated_response + + +""" +##### LLAVA TYPE MODELS ##### +Compatible models - LLaVA 1.5, LLaVA 1.6, Idefics3 +""" + +def generate_llava_messages(messages: List[str]) -> Tuple[List, List]: + """Generates LLAVA messages and image paths from a list of messages. + + Args: + messages (List[str]): A list of message dictionaries containing user, system, and assistant messages. + + Returns: + Tuple[List, List]: A tuple containing: + - A list of formatted LLAVA messages. + - A list of image paths extracted from the messages. + """ + llava_messages = [] + image_paths = [] + for message in messages: + message_dict = {} + message_dict['content'] = [] + + if message['role'] == 'user': + message_dict['role'] = 'user' + if 'image' in message: + if isinstance(message['image'], str): + # Single image + message_dict['content'].append({"type": "image"}) + image_paths.append(message['image']) + elif isinstance(message['image'], list): + # List of images + for img in message['image']: + message_dict['content'].append({"type": "image"}) + image_paths.append(img) + else: + raise ValueError("Invalid image type in message - should be str or List[str]") + + # Add user text message at the end + message_dict['content'].append({"type": "text", "text": message['content']}) + llava_messages.append(message_dict) + + elif message['role'] == 'assistant': + message_dict['role'] = 'assistant' + message_dict['content'].append({"type": "text", "text": message['content']}) + llava_messages.append(message_dict) + + elif message['role'] == 'system': + continue # Skip System message + else: + raise ValueError(f"Invalid role: {message_dict['role']}. Expected 'user', 'system', or 'assistant'.") + + last_user_message = llava_messages[-1] + if last_user_message['role'] == 'user': + content = last_user_message['content'] + contains_image = False + for val in content: + if val["type"] == "image": + contains_image = True + + if not contains_image: # Pass a blank image + blank_image = Image.new('RGB', (128, 128), color='white') + image_paths.append(blank_image) + llava_messages[-1]['content'].append({"type": "image"}) + + return llava_messages, image_paths + +def generate_llava_prompt_text(messages: List[str], **prompt_kwargs) -> str: + """Generates a prompt text for LLAVA from a list of messages. + + Args: + messages (List[str]): A list of message dictionaries containing user, system, and assistant messages. + **prompt_kwargs: Additional keyword arguments for processing. + + Returns: + str: The generated prompt text for LLAVA. + """ + llava_messages, _ = generate_llava_messages(messages=messages) + processor = prompt_kwargs['processor'] + prompt = processor.apply_chat_template(llava_messages, add_generation_prompt=True) + + return prompt + +def generate_llava_response(**response_kwargs) -> str: + """Generates a response from the LLAVA model based on the provided messages and configuration. + + Args: + **response_kwargs: A dictionary containing the following keys: + - messages (List[str]): A list of message dictionaries. + - device (str): The device to which the image tensors will be moved (e.g., 'cuda' or 'cpu'). + - max_tokens (int): The maximum number of tokens to generate. + - model: The model instance used for generating responses. + - processor: The processor instance used for processing images. + + Returns: + str: The generated response from the LLAVA model. + + Raises: + RuntimeError: If the model fails to generate a response. + """ + messages = response_kwargs['messages'] + device = response_kwargs['device'] + max_tokens = response_kwargs['max_tokens'] + model = response_kwargs['model'] + processor = response_kwargs['processor'] + do_sample = response_kwargs['do_sample'] + + llava_messages, image_paths = generate_llava_messages(messages=messages) + prompt = processor.apply_chat_template(llava_messages, add_generation_prompt=True) + + # Process images + processed_images = [] + for image in image_paths: + if type(image) == str: + processed_images.append(load_image(image)) + else: + processed_images.append(image) + + inputs = processor(images=processed_images, text=prompt, return_tensors='pt').to(device) + + try: + output = model.generate(**inputs, max_new_tokens=max_tokens, do_sample=do_sample) + response = processor.decode(output[0], skip_special_tokens=True) + except Exception as e: + raise RuntimeError("Failed to generate response from the LLAVA model.") from e + + return response + + +""" +##### IDEFICS TYPE MODELS ##### +""" + +def generate_idefics_prompt_text(messages: List[str], **prompt_kwargs) -> str: + """Generates a prompt text from a list of messages for the IDEFICS model. + + Args: + messages (List[str]): A list of message dictionaries containing user, system, and assistant messages. + **prompt_kwargs: Additional keyword arguments for processing. + + Returns: + str: The concatenated prompt text generated from the message history. + """ + prompt_text = "" + for msg in messages: + if msg['role'] == 'system': + continue # Skip system message. Ref - https://huggingface.co/HuggingFaceM4/idefics-9b-instruct + elif msg['role'] == 'user': + prompt_text += f" User: {msg['content']} " + if 'image' in msg: + if len(msg['image']) > 1: + for img in msg['image']: + prompt_text += img + else: + prompt_text += msg['image'][0] + prompt_text += "" + elif msg['role'] == 'assistant': + prompt_text += f" Assistant: {msg['content']} " + else: + raise ValueError(f"Invalid role: {msg['role']}. Expected 'user', 'system', or 'assistant'.") + + return prompt_text + +def generate_idefics_response(**response_kwargs) -> str: + """Generates a response from the IDEFICS model based on the provided messages and configuration. + + Args: + **response_kwargs: A dictionary containing the following keys: + - messages (List[str]): A list of message dictionaries. + - device (str): The device to which the image tensors will be moved (e.g., 'cuda' or 'cpu'). + - max_tokens (int): The maximum number of tokens to generate. + - model: The model instance used for generating responses. + - processor: The processor instance used for processing images. + + Returns: + str: The generated response from the IDEFICS model. + + Raises: + RuntimeError: If the model fails to generate a response. + """ + messages = response_kwargs['messages'] + device = response_kwargs['device'] + max_tokens = response_kwargs['max_tokens'] + model = response_kwargs['model'] + processor = response_kwargs['processor'] + + input_messages = [] + for msg in messages: + if msg['role'] == 'system': + continue # Skip system message. Ref - https://huggingface.co/HuggingFaceM4/idefics-9b-instruct + elif msg['role'] == 'user': + input_messages.append(f"\nUser: {msg['content']}") + if 'image' in msg: + if len(msg['image']) > 1: + for img in msg['image']: + loaded_image = load_image(img) + input_messages.append(loaded_image) + else: + loaded_image = load_image(msg['image'][0]) + input_messages.append(loaded_image) + input_messages.append("") + elif msg['role'] == 'assistant': + input_messages.append(f"\nAssistant: {msg['content']} ") + else: + raise ValueError(f"Invalid role: {msg['role']}. Expected 'user', 'system', or 'assistant'.") + + # --batched mode + inputs = processor(input_messages, add_end_of_utterance_token=False, return_tensors="pt").to(device) + + # Generation args + exit_condition = processor.tokenizer("", add_special_tokens=False).input_ids + bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids + + try: + generated_ids = model.generate(**inputs, eos_token_id=exit_condition, bad_words_ids=bad_words_ids, max_length=max_tokens) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True) + except Exception as e: + raise RuntimeError("Failed to generate response from the IDEFICS model.") from e + + return generated_text[0] diff --git a/clemcore/backends/openai_compatible_api.py b/clemcore/backends/openai_compatible_api.py index b111c415e9..a11af202f6 100644 --- a/clemcore/backends/openai_compatible_api.py +++ b/clemcore/backends/openai_compatible_api.py @@ -52,7 +52,7 @@ def __init__(self, client: openai.OpenAI, model_spec: backends.ModelSpec): super().__init__(model_spec) self.client = client - @retry(tries=3, delay=0, logger=logger) + @retry(tries=3, delay=90, logger=logger) @ensure_messages_format def generate_response(self, messages: List[Dict]) -> Tuple[str, Any, str]: """Request a generated response from the OpenAI-compatible remote API. diff --git a/clemcore/clemgame/__init__.py b/clemcore/clemgame/__init__.py index 4d53dedcfa..5e9a135a64 100644 --- a/clemcore/clemgame/__init__.py +++ b/clemcore/clemgame/__init__.py @@ -5,13 +5,15 @@ import os.path import sys from datetime import datetime -from typing import List, Dict, Tuple, Any +from re import match +from typing import List, Dict, Tuple, Any, Union from tqdm import tqdm from types import SimpleNamespace import importlib import importlib.util import inspect import logging +import nltk import clemcore.backends as backends import clemcore.utils.file_utils as file_utils @@ -29,16 +31,18 @@ class GameSpec(SimpleNamespace): Holds all necessary information to play game in clembench (see README for list of attributes) """ - def __init__(self, **kwargs): + def __init__(self, allow_underspecified: bool = False, **kwargs): super().__init__(**kwargs) # check for required fields - if "game_name" not in self: - raise KeyError(f"No game name specified in entry {kwargs}") - if "game_path" not in self: - raise KeyError(f"No game path specified in {kwargs}") + if not allow_underspecified: + if "game_name" not in self: + raise KeyError(f"No game name specified in entry {kwargs}") + if "game_path" not in self: + raise KeyError(f"No game path specified in {kwargs}") # make game_path absolute - if not os.path.isabs(self.game_path): - self.game_path = os.path.join(file_utils.project_root(), self.game_path) + if hasattr(self, 'game_path'): + if not os.path.isabs(self.game_path): + self.game_path = os.path.join(file_utils.project_root(), self.game_path) def __repr__(self): """Returns string representation of this GameSpec.""" @@ -68,7 +72,7 @@ def __contains__(self, attribute): return hasattr(self, attribute) @classmethod - def from_dict(cls, spec: Dict): + def from_dict(cls, spec: Dict, allow_underspecified: bool = False): """Initialize a GameSpec from a dictionary. Can be used to directly create a GameSpec from a game registry entry. Args: @@ -76,7 +80,7 @@ def from_dict(cls, spec: Dict): Returns: A GameSpec instance with the data specified by the passed dict. """ - return cls(**spec) + return cls(allow_underspecified, **spec) def matches(self, spec: Dict): """Check if the game features match a given specification. @@ -115,6 +119,21 @@ def game_file_exists(self): """ return True if os.path.isfile(self.get_game_file()) else False + def unify(self, other: "GameSpec") -> "GameSpec": + """Unify two GameSpec instances. + Args: + other: The other GameSpec instance this instance is to be unified with. + Returns: + The GameSpec unification of this GameSpec instance and the passed GameSpec instance. + Raises: + ValueError: A ValueError exception is raised if the passed GameSpec instance does not unify with this + GameSpec instance. + """ + result = nltk.featstruct.unify(self.__dict__, other.__dict__) + if result is None: + raise ValueError(f"{self} does not unify with {other}") + return GameSpec(**result) + def load_custom_game_registry(_game_registry_path: str = None, is_optional=True): """Load a custom game registry. @@ -157,25 +176,79 @@ def load_game_registry(_game_registry_path: str = None, is_mandatory=True): game_registry.append(_game_spec) -def select_game(game_name: str) -> GameSpec: - """Select a GameSpec from the game registry by game name. +def select_game(game: Union[str, Dict, GameSpec]) -> List[GameSpec]: + """Select a list of GameSpecs from the game registry by unifying game spec dict or game name. Args: - game_name: String name of the selected game. + game: String name of the game matching the 'game_name' value of the game registry entry to select, OR a + GameSpec-like dict, OR a GameSpec object. + A passed GameSpec-like dict can EITHER contain the 'benchmark' key with a list of benchmark versions value, + in which case all games that have matching benchmark version strings in their 'benchmark' key values are + selected, OR contain one or more other GameSpec keys, in which case all games that unify with the given key + values are selected. If there is the 'benchmark' key, only benchmark versions are checked! + For example: {'benchmark':['v2']} will select all games that have 'v2' in their 'benchmark' key value list. + {'main_game': 'wordle'} will select all wordle variants, as their game registry entries have the 'main_game' + key value 'wordle'. Returns: - A GameSpec instance from the game registry corresponding to the passed game_name. + A list of GameSpec instances from the game registry corresponding to the passed game string, dict or GameSpec. Raises: - ValueError: No game specification matching the passed game_name was found in the game registry. + ValueError: No game specification matching the passed game was found in the game registry. """ - # return first entry that matches game_name - for game in game_registry: - if game["game_name"] == game_name: - if game.game_file_exists(): - return game + # check if passed game is parseable JSON: + game_is_dict = False + try: + game = game.replace("'", '"') + game = json.loads(game) + game_is_dict = True + except Exception: + logger.info(f"Passed game '{game}' does not parse as JSON!") + pass + + # convert passed JSON to GameSpec for unification: + game_is_gamespec = False + if game_is_dict: + game = GameSpec.from_dict(game, allow_underspecified=True) + game_is_gamespec = True + elif type(game) == GameSpec: + game_is_gamespec = True + + if game_is_gamespec: + matching_registered_games: list = list() + # iterate over game registry: + for registered_game_spec in game_registry: + + if hasattr(game, 'benchmark'): + # passed game spec specifies benchmark version + for benchmark_version in game.benchmark: + if benchmark_version in registered_game_spec.benchmark: + if registered_game_spec.game_file_exists(): + matching_registered_games.append(registered_game_spec) + else: - raise ValueError(f"Game master file master.py not found in {game['game_path']}." - f"Update clemcore/clemgame/game_registry.json (or game_registry_custom.json) with the right path for {game_name}.") - raise ValueError(f"No games found matching the given specification '{game_name}'. " - "Make sure the game name matches the name in clemcore/clemgame/game_registry.json (or game_registry_custom.json)") + # get unifying entries: + unifying_game_spec = None + try: + unifying_game_spec = game.unify(registered_game_spec) + if unifying_game_spec.game_file_exists(): + # print(f"Found unifying game registry entry: {unifying_game_spec}") + matching_registered_games.append(unifying_game_spec) + except ValueError: + continue + + return matching_registered_games + elif game == "all": + return game_registry + else: + # return first entry that matches game_name + for registered_game_spec in game_registry: + if registered_game_spec["game_name"] == game: + if registered_game_spec.game_file_exists(): + return [registered_game_spec] + else: + raise ValueError(f"Game master file master.py not found in {registered_game_spec['game_path']}." + f"Update clemcore/clemgame/game_registry.json (or game_registry_custom.json) with the right path for {registered_game_spec}.") + raise ValueError(f"No games found matching the given specification '{registered_game_spec}'. " + "Make sure the game name matches the name in clemcore/clemgame/game_registry.json (or game_registry_custom.json)") + # extension to select subset of games # (postponed because it introduces more complexity # on things like how to specify specific episodes (which could, however be integrated into the game spec @@ -1045,6 +1118,7 @@ def __init__(self, game_spec: GameSpec): game_spec: The name of the game (as specified in game_registry) """ super().__init__(game_spec["game_name"], game_spec["game_path"]) + self.game_spec = game_spec self.instances = None self.filter_experiment: List[str] = [] self.is_single_player = True if game_spec["players"] == "one" else False @@ -1055,7 +1129,12 @@ def setup(self, instances_name: str = None): game_path: Path to the game directory. instances_name: Name of the instances JSON file to be used for the benchmark run. """ - self.instances = self.load_instances(instances_name) + if instances_name: + self.instances = self.load_instances(instances_name) + elif hasattr(self.game_spec, 'instances'): + self.instances = self.load_instances(self.game_spec.instances) + else: + self.instances = self.load_instances("instances") # fallback to instances.json default def build_transcripts(self, results_dir: str): """Create and store readable HTML and LaTeX episode transcripts. diff --git a/clemcore/clemgame/game_registry.json b/clemcore/clemgame/game_registry.json index 94bfcb91c7..fa752f842d 100644 --- a/clemcore/clemgame/game_registry.json +++ b/clemcore/clemgame/game_registry.json @@ -101,9 +101,9 @@ }, { "game_name": "mm_mapworld", - "game_path": "../clemgames/mm_mapworld", + "game_path": "../clemgames/mm_mapworld/mm_mapworld_main", "description": "In this game an agent is placed on a graph and needs to navigate through it by reasoning about past steps taken", - "main_game": "mm_mapworld", + "main_game": "mm_mapworld_main", "players": "two", "image": "single", "languages": ["en"], @@ -111,9 +111,9 @@ }, { "game_name": "mm_mapworld_graphs", - "game_path": "../clemgames/mm_mapworld_graphs", + "game_path": "../clemgames/mm_mapworld/mm_mapworld_graphs", "description": "In this game an agent is placed on a graph and needs to navigate through it by reasoning about past steps taken", - "main_game": "mm_mapworld", + "main_game": "mm_mapworld_main", "players": "two", "image": "single", "languages": ["en"], @@ -121,12 +121,84 @@ }, { "game_name": "mm_mapworld_specificroom", - "game_path": "../clemgames/mm_mapworld_specificroom", + "game_path": "../clemgames/mm_mapworld/mm_mapworld_specificroom", "description": "In this game an agent is placed on a graph and needs to navigate through it by reasoning about past steps taken", - "main_game": "mm_mapworld", + "main_game": "mm_mapworld_main", "players": "two", "image": "single", "languages": ["en"], "benchmark": ["1.6", "1.6.5", "2.0"] + }, + { + "game_name": "wordle", + "game_path": "../clemgames/wordle", + "description": "Wordle 5-letter word guessing game.", + "main_game": "wordle", + "players": "one", + "image": "none", + "languages": ["en"], + "benchmark": ["2.0"] + }, + { + "game_name": "wordle_withclue", + "game_path": "../clemgames/wordle", + "description": "Wordle 5-letter word guessing game with clue giver.", + "main_game": "wordle", + "instances": "instances_withclue", + "players": "two", + "image": "none", + "languages": ["en"], + "benchmark": ["2.0"] + }, + { + "game_name": "wordle_withcritic", + "game_path": "../clemgames/wordle", + "description": "Wordle 5-letter word guessing game with critic.", + "main_game": "wordle", + "instances": "instances_withcritic", + "players": "two", + "image": "none", + "languages": ["en"], + "benchmark": ["2.0"] + }, + { + "game_name": "matchit_ascii", + "game_path": "../clemgames/matchit_ascii", + "description": "Matching grids of ASCII characters between two players.", + "main_game": "matchit_ascii", + "players": "two", + "image": "none", + "languages": ["en"], + "benchmark": ["2.0"] + }, + { + "game_name": "textmapworld", + "game_path": "../clemgames/textmapworld/textmapworld_main", + "description": "Exhaustively exploring a map.", + "main_game": "textmapworld", + "players": "one", + "image": "none", + "languages": ["en"], + "benchmark": ["2.0"] + }, + { + "game_name": "textmapworld_graphreasoning", + "game_path": "../clemgames/textmapworld/textmapworld_graphreasoning", + "description": "Exhaustively exploring a map with graph reasoning.", + "main_game": "textmapworld", + "players": "one", + "image": "none", + "languages": ["en"], + "benchmark": ["2.0"] + }, + { + "game_name": "textmapworld_specificroom", + "game_path": "../clemgames/textmapworld/textmapworld_specificroom", + "description": "Exploring a map to find a specific room.", + "main_game": "textmapworld", + "players": "one", + "image": "none", + "languages": ["en"], + "benchmark": ["2.0"] } ] \ No newline at end of file diff --git a/clemcore/cli.py b/clemcore/cli.py index 2ec3814cac..c4e1a4828e 100644 --- a/clemcore/cli.py +++ b/clemcore/cli.py @@ -121,14 +121,14 @@ def main(args: argparse.Namespace): run_parser.add_argument("-e", "--experiment_name", type=str, help="Optional argument to only run a specific experiment") run_parser.add_argument("-g", "--game", type=str, - required=True, help="A specific game name (see ls).") + required=True, help="A specific game name (see ls), or a GameSpec-like JSON string object.") run_parser.add_argument("-t", "--temperature", type=float, default=0.0, help="Argument to specify sampling temperature for the models. Default: 0.0.") run_parser.add_argument("-l", "--max_tokens", type=int, default=100, help="Specify the maximum number of tokens to be generated per turn (except for cohere). " "Be careful with high values which might lead to exceed your API token limits." "Default: 100.") - run_parser.add_argument("-i", "--instances_name", type=str, default="instances", + run_parser.add_argument("-i", "--instances_name", type=str, default=None, help="The instances file name (.json suffix will be added automatically.") run_parser.add_argument("-r", "--results_dir", type=str, default="results", help="A relative or absolute path to the results root directory. " @@ -139,7 +139,8 @@ def main(args: argparse.Namespace): score_parser.add_argument("-e", "--experiment_name", type=str, help="Optional argument to only run a specific experiment") score_parser.add_argument("-g", "--game", type=str, - help="A specific game name (see ls).") + help='A specific game name (see ls), a GameSpec-like JSON string object or "all" (default).', + default="all") score_parser.add_argument("-r", "--results_dir", type=str, default="results", help="A relative or absolute path to the results root directory. " "For example '-r results/v1.5/de‘ or '-r /absolute/path/for/results'. " @@ -149,7 +150,8 @@ def main(args: argparse.Namespace): transcribe_parser.add_argument("-e", "--experiment_name", type=str, help="Optional argument to only run a specific experiment") transcribe_parser.add_argument("-g", "--game", type=str, - help="A specific game name (see ls).", default="all") + help='A specific game name (see ls), a GameSpec-like JSON string object or "all" (default).', + default="all") transcribe_parser.add_argument("-r", "--results_dir", type=str, default="results", help="A relative or absolute path to the results root directory. " "For example '-r results/v1.5/de‘ or '-r /absolute/path/for/results'. " diff --git a/docs/howto_update_to_v2.md b/docs/howto_update_to_v2.md index b3b78ce395..73a86baf66 100644 --- a/docs/howto_update_to_v2.md +++ b/docs/howto_update_to_v2.md @@ -18,9 +18,11 @@ To add games that are not (yet) addd to the official collection, create an entry "game_name": "taboo", "game_path": "../clemgames/taboo", # relative to clembench or absolute "description": "Taboo game between two agents where one has to describe a word for the other to guess.", # copied from GameBenchmark get_description() in master.py - "main_game": "taboo", # relevant for games with different versions, otherwise same as game_name + "main_game": "taboo", # relevant for games with different versions, otherwise same as game_name, + "instances": "instances", # OPTIONAL; if this key does not exist, instances.json will be used, if it exists, the instances file with the name given here will be used "players": "two", # [one|multi] "image": "none", # [one|multi] + "benchmark": ["0.9", "1.0", "1.5", "2.0"], # list of benchmark versions this game is part of; can be empty "languages": ["en"], # use ISO- codes for available languages } ] diff --git a/scripts/run_multiling.sh b/scripts/run_multiling.sh new file mode 100755 index 0000000000..0e792a940f --- /dev/null +++ b/scripts/run_multiling.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# Usage: scripts/run_multiling.sh + +source prepare_path.sh +mkdir -p logs + +version="v1.6" +games=( +"referencegame" +#"taboo" +#"imagegame" +#"wordle" +#"wordle_withclue" +#"wordle_withcritic" +#"privateshared" +#"codenames" +#"matchit_ascii" +#"guess_what" +#"textmapworld" +) + +languages=("ar" "de" "en" "es" "it" "ja" "pt" "ru" "te" "tk" "tr" "zh") + +models=( +"llama-70b" # = llama-3-70b-instruct +#"mock" +#"claude-3-opus-20240229" +#"gpt-4-turbo-2024-04-09" +#"aya-23-35B" +#"Llama-3-SauerkrautLM-70b-Instruct" +#"Meta-Llama-3.1-70B-Instruct" +#"Mixtral-8x22B-Instruct-v0.1" +#"Qwen1.5-72B-Chat" +) + +echo +echo "===================================================" +echo "RUNNING: Benchmark Run Version ${version}" +echo "===================================================" +echo + +for lang in "${languages[@]}"; do + for game in "${games[@]}"; do + for model in "${models[@]}"; do + echo "Running ${model} on ${game}_${lang}" + { time python3 clemcore/cli.py run -g "${game}" -m "${model}" -i "instances_${version}_${lang}.json" -r "results/${version}/${lang}"; } 2>&1 | tee "logs/run.${game}.${lang}.${model}.log" + done + echo "Transcribing ${game} in ${lang}" + { time python3 clemcore/cli.py transcribe -g "${game}" -r "results/${version}/${lang}"; } 2>&1 | tee "logs/transcribe.${game}.${lang}.log" + echo "Scoring ${game} in ${lang}" + { time python3 clemcore/cli.py score -g "${game}" -r "results/${version}/${lang}"; } 2>&1 | tee "logs/score.${game}.${lang}.log" + done + echo "Evaluating all models across all games in ${lang}" +{ time python3 evaluation/bencheval.py -p "results/${version}/${lang}"; } +done + +echo "===================================================" +echo "FINISHED: Benchmark Run Version ${version}" +echo "==================================================="