config.yaml

# TODO GB Size, Params, Future would be recomendations on hardware? Test a bunch of quants?
# TODO support gguf properly by downloading appropriate runtimes
benchmarks:
  # speaking:
  #   models:
  #     - name: piper
  #       type: speaking
  #       runtime: docker
  #       quant: fp32
  #       url: cjpais/piper-http
  creation:
    models:
      - name: sdxl-base-1.0
        url: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors
        type: creation
        runtime: comfy 
        # workflow_url: 
        quant: fp16
        steps: 20
        scheduler: "normal"
        cfg_scale: 8
        variants:
          - resolution: 512
          - resolution: 1024
      - name: sdxl-lightning-4step
        url: https://huggingface.co/ByteDance/SDXL-Lightning/resolve/main/sdxl_lightning_4step.safetensors
        type: creation
        runtime: comfy
        # workflow_url: 
        quant: fp16
        steps: 4
        scheduler: "sgm_uniform"
        cfg_scale: 1.0
        variants:
          - resolution: 512
          - resolution: 1024
    datasets:
      - name: androart
        type: creation
        url: "https://datasets.andromeda.computer/creation/prompts.json"
        source: "andromeda"
  hearing:
    models:
      - name: whisper-tiny
        type: hearing
        runtime: whisperfile
        quant: Q8_0
        url: https://huggingface.co/cjpais/whisperfile/resolve/main/whisper.tiny.q8.bin
      # - name: whisper-base
      #   type: hearing
      #   runtime: whisperfile
      #   quant: Q8_0
      #   url: https://huggingface.co/cjpais/whisperfile/resolve/main/whisper.base.q8.bin
      # - name: whisper-small
      #   type: hearing
      #   runtime: whisperfile
      #   quant: Q8_0
      #   url: https://huggingface.co/cjpais/whisperfile/resolve/main/whisper.small.q8.bin
      # - name: whisper-medium
      #   type: hearing
      #   runtime: whisperfile
      #   quant: Q8_0
      #   url: https://huggingface.co/cjpais/whisperfile/resolve/main/whisper.medium.q8.bin
      - name: whisper-large-v3-turbo
        type: hearing
        runtime: whisperfile
        quant: Q8_0
        url: https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q8_0.bin
      - name: whisper-large-v3
        type: hearing
        runtime: whisperfile
        quant: Q8_0
        url: https://huggingface.co/cjpais/whisperfile/resolve/main/whisper.large-v3.q8.bin
    datasets:
      - name: androspeech
        type: file
        url: "https://datasets.andromeda.computer/audio/androspeech"
        source: "andromeda"
      - name: voxpopuli-en
        type: file
        url: "https://datasets.andromeda.computer/audio/voxpopuli"
        source: "andromeda"
  vision:
    models:
      - name: moondream2
        type: vision
        runtime: llamafile
        quant: Q8_0
        context: 2048
        prompt_template: "\n\nQuestion:[img-10]{{ prompt }}\n\nAnswer:"
        stop: "<|endoftext|>"
        url: https://huggingface.co/cjpais/moondream2-llamafile/resolve/main/moondream2-050824-q8.gguf
        projector_url: https://huggingface.co/cjpais/moondream2-llamafile/resolve/main/moondream2-mmproj-050824-f16.gguf
    datasets:
      - name: VibeEval
        type: file
        url: "https://datasets-server.huggingface.co/rows?dataset=RekaAI%2FVibeEval&config=default&split=test&offset=0&length=100"
        source: "hf-api"
        key: "media_url"
    # add llava 1.6 and test it as well
  language:
    models:
      - name: tinyllama-chat
        type: language
        runtime: llamafile
        # TODO can we just use the chat template in GGUF itself, using open ai chat completion endpoint????!!!
        prompt_template: "<|im_start|>system\nYou are a helpful AI assistant.<|im_end|>\n<|im_start|>user\n{{ prompt }}<|im_end|>\n<|im_start|>assistant"
        quant: Q5_K_M
        context: 2048
        stop: "</s>"
        params: 1.1B
        url: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf
      - name: phi-3-mini-4k-instruct
        type: language
        runtime: llamafile
        prompt_template: "<|system|>\nYou are a helpful AI assistant.<|end|>\n<|user|>\n{{ prompt }}<|end|>\n<|assistant|>"
        quant: Q5_K_M
        params: 3.8B
        stop: "<|end|>"
        context: 4096
        url: https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q5_K_M.gguf
      - name: llama3-8B-instruct
        type: language
        runtime: llamafile
        prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are a helpful AI assistant.<|eot_id|><|start_header_id|>User<|end_header_id|>\n{{ prompt }}<|eot_id|><|start_header_id|>Llama<|end_header_id|>"
        quant: Q5_K_M
        params: 8B
        context: 8192
        stop: "<|eot_id|>"
        url: https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
      # - name: llama3-8B-instruct
      #   type: language
      #   runtime: llamafile
      #   prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are a helpful AI assistant.<|eot_id|><|start_header_id|>User<|end_header_id|>\n{{ prompt }}<|eot_id|><|start_header_id|>Llama<|end_header_id|>"
      #   quant: Q5_K_M
      #   params: 8B
      #   context: 8192
      #   stop: "<|eot_id|>"
      #   url: https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
      # - name: phi3-medium
      #   type: language
      #   runtime: llamafile
      #   prompt_template: "<|user|>\n{{ prompt }} <|end|>\n<|assistant|>"
      #   quant: Q5_K_M
      #   params: 14B
      #   context: 131072
      #   stop: "<|end|>"
      #   url: https://huggingface.co/bartowski/Phi-3-medium-128k-instruct-GGUF/resolve/main/Phi-3-medium-128k-instruct-Q5_K_M.gguf
      # - name: codestral-22B
      #   type: language
      #   runtime: llamafile
      #   prompt_template: "<s> [INST] <<SYS>>\nYou are a helpful AI assistant.\n <</SYS>>\n\n {{ prompt }} [/INST]  </s>"
      #   quant: Q5_K_M
      #   params: 22B
      #   context: 32768
      #   stop: "</s>"
      #   url: https://huggingface.co/bartowski/Codestral-22B-v0.1-GGUF/resolve/main/Codestral-22B-v0.1-Q5_K_M.gguf
      # - name: gemma2-27B
      #   type: language
      #   runtime: llamafile
      #   prompt_template: "<start_of_turn>user\n{{ prompt }}<end_of_turn>\n<start_of_turn>model\n<end_of_turn>\n<start_of_turn>model"
      #   quant: Q5_K_M
      #   params: 27B
      #   context: 8192
      #   stop: "<end_of_turn>"
      #   url: https://huggingface.co/bartowski/gemma-2-27b-it-GGUF/resolve/main/gemma-2-27b-it-Q5_K_M.gguf
      # - name: mixtral-8x7B
      #   type: language
      #   runtime: llamafile
      #   prompt_template: "[INST] {{prompt}} [/INST]"
      #   quant: Q5_K_M
      #   params: 8x7B
      #   context: 32768
      #   stop: "<|end|>"
      #   url: https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf
      # - name: llama3-70B-instruct
      #   type: language
      #   runtime: llamafile
      #   prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are a helpful AI assistant.<|eot_id|><|start_header_id|>User<|end_header_id|>\n{{ prompt }}<|eot_id|><|start_header_id|>Llama<|end_header_id|>"
      #   quant: Q5_K_M
      #   params: 70B
      #   context: 8192
      #   stop: "<|eot_id|>"
      #   url: https://huggingface.co/bartowski/Meta-Llama-3-70B-Instruct-GGUF/resolve/main/Meta-Llama-3-70B-Instruct-Q5_K_M.gguf
    datasets:
      - name: OpenOrcaShortQuestion
        type: prompt
        url: "https://datasets-server.huggingface.co/filter?dataset=Open-Orca%2FOpenOrca&config=default&split=train&offset=0&length=16&where=%22question.length%22%3E%3D12+and+%22question.length%22%3C384"
        source: "hf-api"
        key: "question"
      - name: OpenOrcaMediumQuestion
        type: prompt
        url: "https://datasets-server.huggingface.co/filter?dataset=Open-Orca%2FOpenOrca&config=default&split=train&offset=0&length=16&where=%22question.length%22%3E%3D384+and+%22question.length%22%3C1536"
        source: "hf-api"
        key: "question"
      - name: OpenOrcaLongQuestion
        type: prompt
        url: "https://datasets-server.huggingface.co/filter?dataset=Open-Orca%2FOpenOrca&config=default&split=train&offset=0&length=16&where=%22question.length%22%3E%3D1536+and+%22question.length%22%3C3072"
        source: "hf-api"
        key: "question"
      - name: OpenOrcaShortResponse
        type: prompt
        url: "https://datasets-server.huggingface.co/filter?dataset=Open-Orca%2FOpenOrca&config=default&split=train&offset=200&length=16&where=%22response.length%22%3E%3D12+and+%22response.length%22%3C384"
        source: "hf-api"
        key: "question"
      - name: OpenOrcaMediumResponse
        type: prompt
        url: "https://datasets-server.huggingface.co/filter?dataset=Open-Orca%2FOpenOrca&config=default&split=train&offset=200&length=16&where=%22response.length%22%3E%3D384+and+%22response.length%22%3C1536"
        source: "hf-api"
        key: "question"
      - name: OpenOrcaLongResponse
        type: prompt
        url: "https://datasets-server.huggingface.co/filter?dataset=Open-Orca%2FOpenOrca&config=default&split=train&offset=200&length=16&where=%22response.length%22%3E%3D1536+and+%22response.length%22%3C3072"
        source: "hf-api"
        key: "question"
runtimes:
  - name: llamafile
    url: https://datasets.andromeda.computer/llamafile-0.8.13-accel
    version: 0.8.13
  - name: whisperfile
    url: https://datasets.andromeda.computer/whisperfile-0.8.13-accel
    version: 0.8.13
  - name: docker
  - name: comfy
    version: 97ae6ef