-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
executable file
·225 lines (225 loc) · 9.69 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# TODO GB Size, Params, Future would be recomendations on hardware? Test a bunch of quants?
# TODO support gguf properly by downloading appropriate runtimes
benchmarks:
# speaking:
# models:
# - name: piper
# type: speaking
# runtime: docker
# quant: fp32
# url: cjpais/piper-http
creation:
models:
- name: sdxl-base-1.0
url: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors
type: creation
runtime: comfy
# workflow_url:
quant: fp16
steps: 20
scheduler: "normal"
cfg_scale: 8
variants:
- resolution: 512
- resolution: 1024
- name: sdxl-lightning-4step
url: https://huggingface.co/ByteDance/SDXL-Lightning/resolve/main/sdxl_lightning_4step.safetensors
type: creation
runtime: comfy
# workflow_url:
quant: fp16
steps: 4
scheduler: "sgm_uniform"
cfg_scale: 1.0
variants:
- resolution: 512
- resolution: 1024
datasets:
- name: androart
type: creation
url: "https://datasets.andromeda.computer/creation/prompts.json"
source: "andromeda"
hearing:
models:
- name: whisper-tiny
type: hearing
runtime: whisperfile
quant: Q8_0
url: https://huggingface.co/cjpais/whisperfile/resolve/main/whisper.tiny.q8.bin
# - name: whisper-base
# type: hearing
# runtime: whisperfile
# quant: Q8_0
# url: https://huggingface.co/cjpais/whisperfile/resolve/main/whisper.base.q8.bin
# - name: whisper-small
# type: hearing
# runtime: whisperfile
# quant: Q8_0
# url: https://huggingface.co/cjpais/whisperfile/resolve/main/whisper.small.q8.bin
# - name: whisper-medium
# type: hearing
# runtime: whisperfile
# quant: Q8_0
# url: https://huggingface.co/cjpais/whisperfile/resolve/main/whisper.medium.q8.bin
- name: whisper-large-v3-turbo
type: hearing
runtime: whisperfile
quant: Q8_0
url: https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q8_0.bin
- name: whisper-large-v3
type: hearing
runtime: whisperfile
quant: Q8_0
url: https://huggingface.co/cjpais/whisperfile/resolve/main/whisper.large-v3.q8.bin
datasets:
- name: androspeech
type: file
url: "https://datasets.andromeda.computer/audio/androspeech"
source: "andromeda"
- name: voxpopuli-en
type: file
url: "https://datasets.andromeda.computer/audio/voxpopuli"
source: "andromeda"
vision:
models:
- name: moondream2
type: vision
runtime: llamafile
quant: Q8_0
context: 2048
prompt_template: "\n\nQuestion:[img-10]{{ prompt }}\n\nAnswer:"
stop: "<|endoftext|>"
url: https://huggingface.co/cjpais/moondream2-llamafile/resolve/main/moondream2-050824-q8.gguf
projector_url: https://huggingface.co/cjpais/moondream2-llamafile/resolve/main/moondream2-mmproj-050824-f16.gguf
datasets:
- name: VibeEval
type: file
url: "https://datasets-server.huggingface.co/rows?dataset=RekaAI%2FVibeEval&config=default&split=test&offset=0&length=100"
source: "hf-api"
key: "media_url"
# add llava 1.6 and test it as well
language:
models:
- name: tinyllama-chat
type: language
runtime: llamafile
# TODO can we just use the chat template in GGUF itself, using open ai chat completion endpoint????!!!
prompt_template: "<|im_start|>system\nYou are a helpful AI assistant.<|im_end|>\n<|im_start|>user\n{{ prompt }}<|im_end|>\n<|im_start|>assistant"
quant: Q5_K_M
context: 2048
stop: "</s>"
params: 1.1B
url: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf
- name: phi-3-mini-4k-instruct
type: language
runtime: llamafile
prompt_template: "<|system|>\nYou are a helpful AI assistant.<|end|>\n<|user|>\n{{ prompt }}<|end|>\n<|assistant|>"
quant: Q5_K_M
params: 3.8B
stop: "<|end|>"
context: 4096
url: https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q5_K_M.gguf
- name: llama3-8B-instruct
type: language
runtime: llamafile
prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are a helpful AI assistant.<|eot_id|><|start_header_id|>User<|end_header_id|>\n{{ prompt }}<|eot_id|><|start_header_id|>Llama<|end_header_id|>"
quant: Q5_K_M
params: 8B
context: 8192
stop: "<|eot_id|>"
url: https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
# - name: llama3-8B-instruct
# type: language
# runtime: llamafile
# prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are a helpful AI assistant.<|eot_id|><|start_header_id|>User<|end_header_id|>\n{{ prompt }}<|eot_id|><|start_header_id|>Llama<|end_header_id|>"
# quant: Q5_K_M
# params: 8B
# context: 8192
# stop: "<|eot_id|>"
# url: https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf
# - name: phi3-medium
# type: language
# runtime: llamafile
# prompt_template: "<|user|>\n{{ prompt }} <|end|>\n<|assistant|>"
# quant: Q5_K_M
# params: 14B
# context: 131072
# stop: "<|end|>"
# url: https://huggingface.co/bartowski/Phi-3-medium-128k-instruct-GGUF/resolve/main/Phi-3-medium-128k-instruct-Q5_K_M.gguf
# - name: codestral-22B
# type: language
# runtime: llamafile
# prompt_template: "<s> [INST] <<SYS>>\nYou are a helpful AI assistant.\n <</SYS>>\n\n {{ prompt }} [/INST] </s>"
# quant: Q5_K_M
# params: 22B
# context: 32768
# stop: "</s>"
# url: https://huggingface.co/bartowski/Codestral-22B-v0.1-GGUF/resolve/main/Codestral-22B-v0.1-Q5_K_M.gguf
# - name: gemma2-27B
# type: language
# runtime: llamafile
# prompt_template: "<start_of_turn>user\n{{ prompt }}<end_of_turn>\n<start_of_turn>model\n<end_of_turn>\n<start_of_turn>model"
# quant: Q5_K_M
# params: 27B
# context: 8192
# stop: "<end_of_turn>"
# url: https://huggingface.co/bartowski/gemma-2-27b-it-GGUF/resolve/main/gemma-2-27b-it-Q5_K_M.gguf
# - name: mixtral-8x7B
# type: language
# runtime: llamafile
# prompt_template: "[INST] {{prompt}} [/INST]"
# quant: Q5_K_M
# params: 8x7B
# context: 32768
# stop: "<|end|>"
# url: https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q5_K_M.gguf
# - name: llama3-70B-instruct
# type: language
# runtime: llamafile
# prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are a helpful AI assistant.<|eot_id|><|start_header_id|>User<|end_header_id|>\n{{ prompt }}<|eot_id|><|start_header_id|>Llama<|end_header_id|>"
# quant: Q5_K_M
# params: 70B
# context: 8192
# stop: "<|eot_id|>"
# url: https://huggingface.co/bartowski/Meta-Llama-3-70B-Instruct-GGUF/resolve/main/Meta-Llama-3-70B-Instruct-Q5_K_M.gguf
datasets:
- name: OpenOrcaShortQuestion
type: prompt
url: "https://datasets-server.huggingface.co/filter?dataset=Open-Orca%2FOpenOrca&config=default&split=train&offset=0&length=16&where=%22question.length%22%3E%3D12+and+%22question.length%22%3C384"
source: "hf-api"
key: "question"
- name: OpenOrcaMediumQuestion
type: prompt
url: "https://datasets-server.huggingface.co/filter?dataset=Open-Orca%2FOpenOrca&config=default&split=train&offset=0&length=16&where=%22question.length%22%3E%3D384+and+%22question.length%22%3C1536"
source: "hf-api"
key: "question"
- name: OpenOrcaLongQuestion
type: prompt
url: "https://datasets-server.huggingface.co/filter?dataset=Open-Orca%2FOpenOrca&config=default&split=train&offset=0&length=16&where=%22question.length%22%3E%3D1536+and+%22question.length%22%3C3072"
source: "hf-api"
key: "question"
- name: OpenOrcaShortResponse
type: prompt
url: "https://datasets-server.huggingface.co/filter?dataset=Open-Orca%2FOpenOrca&config=default&split=train&offset=200&length=16&where=%22response.length%22%3E%3D12+and+%22response.length%22%3C384"
source: "hf-api"
key: "question"
- name: OpenOrcaMediumResponse
type: prompt
url: "https://datasets-server.huggingface.co/filter?dataset=Open-Orca%2FOpenOrca&config=default&split=train&offset=200&length=16&where=%22response.length%22%3E%3D384+and+%22response.length%22%3C1536"
source: "hf-api"
key: "question"
- name: OpenOrcaLongResponse
type: prompt
url: "https://datasets-server.huggingface.co/filter?dataset=Open-Orca%2FOpenOrca&config=default&split=train&offset=200&length=16&where=%22response.length%22%3E%3D1536+and+%22response.length%22%3C3072"
source: "hf-api"
key: "question"
runtimes:
- name: llamafile
url: https://datasets.andromeda.computer/llamafile-0.8.13-accel
version: 0.8.13
- name: whisperfile
url: https://datasets.andromeda.computer/whisperfile-0.8.13-accel
version: 0.8.13
- name: docker
- name: comfy
version: 97ae6ef