Skip to content

Commit

Permalink
Moves to query language to simplify a bit the all thing
Browse files Browse the repository at this point in the history
  • Loading branch information
Kakulukian committed Jun 18, 2024
1 parent 33e8d6b commit 6ad0e97
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 83 deletions.
16 changes: 7 additions & 9 deletions packages/tasks/src/model-libraries-downloads.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@
* Read this doc about download stats on the Hub:
*
* https://huggingface.co/docs/hub/models-download-stats
*
* Available fields:
* - path: the complete path of the model
* - path_prefix: the prefix of the path of the model
* - path_extension: the extension of the path of the model
* - path_filename: the extension of the path of the model
* see also:
* https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html
* https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
*/

export type ElasticBoolQueryFilter =
// match a single filename
| { term?: { path: string } }
// match multiple possible filenames
| { terms?: { path: string[] } }
// match a wildcard
| { wildcard?: { path: string } };
export type ElasticSearchQuery = string;
108 changes: 34 additions & 74 deletions packages/tasks/src/model-libraries.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import * as snippets from "./model-libraries-snippets";
import type { ModelData } from "./model-data";
import type { ElasticBoolQueryFilter } from "./model-libraries-downloads";
import type { ElasticSearchQuery } from "./model-libraries-downloads";

/**
* Elements configurable by a model library.
Expand Down Expand Up @@ -34,7 +34,7 @@ export interface LibraryUiElement {
* By default, those files are counted:
* "config.json", "config.yaml", "hyperparams.yaml", "meta.yaml"
*/
countDownloads?: ElasticBoolQueryFilter;
countDownloads?: ElasticSearchQuery;
/**
* should we display this library in hf.co/models filter
* (only for popular libraries with > 100 models)
Expand Down Expand Up @@ -65,9 +65,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
docsUrl: "https://huggingface.co/docs/hub/adapters",
snippets: snippets.adapters,
filter: true,
countDownloads: {
term: { path: "adapter_config.json" },
},
countDownloads: `path:"adapter_config.json"`,
},
allennlp: {
prettyLabel: "AllenNLP",
Expand All @@ -84,9 +82,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
docsUrl: "https://huggingface.co/docs/hub/asteroid",
snippets: snippets.asteroid,
filter: true,
countDownloads: {
term: { path: "pytorch_model.bin" },
},
countDownloads: `path:"pytorch_model.bin"`,
},
audiocraft: {
prettyLabel: "Audiocraft",
Expand All @@ -107,17 +103,15 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
repoName: "big_vision",
repoUrl: "https://github.com/google-research/big_vision",
filter: false,
countDownloads: {
wildcard: { path: "*.npz" },
},
countDownloads: `path_extension:"npz"`,
},
chat_tts: {
prettyLabel: "ChatTTS",
repoName: "ChatTTS",
repoUrl: "https://github.com/2noise/ChatTTS.git",
filter: false,
countDownloads: { term: { path: "asset/GPT.pt" } },
snippets: snippets.chattts,
filter: false,
countDownloads: `path:"asset/GPT.pt"`,
},
diffusers: {
prettyLabel: "Diffusers",
Expand All @@ -140,18 +134,14 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
docsUrl: "https://aphp.github.io/edsnlp/latest/",
filter: false,
snippets: snippets.edsnlp,
countDownloads: {
wildcard: { path: "*/config.cfg" },
},
countDownloads: `path_filename:"config.cfg" AND path_prefix:*`,
},
elm: {
prettyLabel: "ELM",
repoName: "elm",
repoUrl: "https://github.com/slicex-ai/elm",
filter: false,
countDownloads: {
wildcard: { path: "*/slicex_elm_config.json" },
},
countDownloads: `path_filename:"slicex_elm_config.json" AND path_prefix:*`,
},
espnet: {
prettyLabel: "ESPnet",
Expand Down Expand Up @@ -182,9 +172,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
repoUrl: "https://fasttext.cc/",
snippets: snippets.fasttext,
filter: true,
countDownloads: {
wildcard: { path: "*.bin" },
},
countDownloads: `path_extension:"bin"`,
},
flair: {
prettyLabel: "Flair",
Expand All @@ -193,35 +181,29 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
docsUrl: "https://huggingface.co/docs/hub/flair",
snippets: snippets.flair,
filter: true,
countDownloads: {
term: { path: "pytorch_model.bin" },
},
countDownloads: `path:"pytorch_model.bin"`,
},
"gemma.cpp": {
prettyLabel: "gemma.cpp",
repoName: "gemma.cpp",
repoUrl: "https://github.com/google/gemma.cpp",
filter: false,
countDownloads: { wildcard: { path: "*.sbs" } },
countDownloads: `path_extension:"sbs"`,
},
gliner: {
prettyLabel: "GLiNER",
repoName: "GLiNER",
repoUrl: "https://github.com/urchade/GLiNER",
snippets: snippets.gliner,
filter: false,
countDownloads: {
term: { path: "gliner_config.json" },
},
countDownloads: `path:"gliner_config.json"`,
},
grok: {
prettyLabel: "Grok",
repoName: "Grok",
repoUrl: "https://github.com/xai-org/grok-1",
filter: false,
countDownloads: {
terms: { path: ["ckpt/tensor00000_000", "ckpt-0/tensor00000_000"] },
},
countDownloads: `path:"ckpt/tensor00000_000" OR path:"ckpt-0/tensor00000_000"`,
},
keras: {
prettyLabel: "Keras",
Expand All @@ -230,7 +212,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
docsUrl: "https://huggingface.co/docs/hub/keras",
snippets: snippets.keras,
filter: true,
countDownloads: { term: { path: "saved_model.pb" } },
countDownloads: `path:"saved_model.pb"`,
},
"keras-nlp": {
prettyLabel: "KerasNLP",
Expand All @@ -256,7 +238,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
docsUrl: "https://huggingface.co/docs/hub/ml-agents",
snippets: snippets.mlAgents,
filter: true,
countDownloads: { wildcard: { path: "*.onnx" } },
countDownloads: `path_extension:"onnx"`,
},
mlx: {
prettyLabel: "MLX",
Expand All @@ -272,23 +254,23 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
docsUrl: "https://huggingface.co/docs/hub/mlx-image",
snippets: snippets.mlxim,
filter: false,
countDownloads: { term: { path: "model.safetensors" } },
countDownloads: `path:"model.safetensors"`,
},
nemo: {
prettyLabel: "NeMo",
repoName: "NeMo",
repoUrl: "https://github.com/NVIDIA/NeMo",
snippets: snippets.nemo,
filter: true,
countDownloads: { wildcard: { path: "*.nemo" } },
countDownloads: `path_extension:"nemo"`,
},
open_clip: {
prettyLabel: "OpenCLIP",
repoName: "OpenCLIP",
repoUrl: "https://github.com/mlfoundations/open_clip",
snippets: snippets.open_clip,
filter: true,
countDownloads: { wildcard: { path: "*pytorch_model.bin" } },
countDownloads: `path_extension:"bin" AND path_filename:*pytorch_model`,
},
paddlenlp: {
prettyLabel: "paddlenlp",
Expand All @@ -297,19 +279,15 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
docsUrl: "https://huggingface.co/docs/hub/paddlenlp",
snippets: snippets.paddlenlp,
filter: true,
countDownloads: {
term: { path: "model_config.json" },
},
countDownloads: `path:"model_config.json"`,
},
peft: {
prettyLabel: "PEFT",
repoName: "PEFT",
repoUrl: "https://github.com/huggingface/peft",
snippets: snippets.peft,
filter: true,
countDownloads: {
term: { path: "adapter_config.json" },
},
countDownloads: `path:"adapter_config.json"`,
},
"pyannote-audio": {
prettyLabel: "pyannote.audio",
Expand All @@ -330,7 +308,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
repoName: "recurrentgemma",
repoUrl: "https://github.com/google-deepmind/recurrentgemma",
filter: false,
countDownloads: { term: { path: "tokenizer.model" } },
countDownloads: `path:"tokenizer.model"`,
},
"sample-factory": {
prettyLabel: "sample-factory",
Expand All @@ -339,7 +317,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
docsUrl: "https://huggingface.co/docs/hub/sample-factory",
snippets: snippets.sampleFactory,
filter: true,
countDownloads: { term: { path: "cfg.json" } },
countDownloads: `path:"cfg.json"`,
},
"sentence-transformers": {
prettyLabel: "sentence-transformers",
Expand All @@ -363,9 +341,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
repoUrl: "https://github.com/scikit-learn/scikit-learn",
snippets: snippets.sklearn,
filter: true,
countDownloads: {
term: { path: "sklearn_model.joblib" },
},
countDownloads: `path:"sklearn_model.joblib"`,
},
spacy: {
prettyLabel: "spaCy",
Expand All @@ -374,9 +350,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
docsUrl: "https://huggingface.co/docs/hub/spacy",
snippets: snippets.spacy,
filter: true,
countDownloads: {
wildcard: { path: "*.whl" },
},
countDownloads: `path_extension:"whl"`,
},
"span-marker": {
prettyLabel: "SpanMarker",
Expand All @@ -393,16 +367,14 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
docsUrl: "https://huggingface.co/docs/hub/speechbrain",
snippets: snippets.speechbrain,
filter: true,
countDownloads: {
term: { path: "hyperparams.yaml" },
},
countDownloads: `path:"hyperparams.yaml"`,
},
"stable-audio-tools": {
prettyLabel: "Stable Audio Tools",
repoName: "stable-audio-tools",
repoUrl: "https://github.com/Stability-AI/stable-audio-tools.git",
filter: false,
countDownloads: { term: { path: "model.safetensors" } },
countDownloads: `path:"model.safetensors"`,
snippets: snippets.stable_audio_tools,
},
"stable-baselines3": {
Expand All @@ -412,9 +384,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
docsUrl: "https://huggingface.co/docs/hub/stable-baselines3",
snippets: snippets.stableBaselines3,
filter: true,
countDownloads: {
wildcard: { path: "*.zip" },
},
countDownloads: `path_extension:"zip"`,
},
stanza: {
prettyLabel: "Stanza",
Expand All @@ -423,9 +393,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
docsUrl: "https://huggingface.co/docs/hub/stanza",
snippets: snippets.stanza,
filter: true,
countDownloads: {
term: { path: "models/default.zip" },
},
countDownloads: `path:"models/default.zip"`,
},
tensorflowtts: {
prettyLabel: "TensorFlowTTS",
Expand All @@ -438,16 +406,14 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
repoName: "TiC-CLIP",
repoUrl: "https://github.com/apple/ml-tic-clip",
filter: false,
countDownloads: { wildcard: { path: "checkpoints/*.pt" } },
countDownloads: `path_extension:"pt" AND path_prefix:"checkpoints"`,
},
timesfm: {
prettyLabel: "TimesFM",
repoName: "timesfm",
repoUrl: "https://github.com/google-research/timesfm",
filter: false,
countDownloads: {
term: { path: "checkpoints/checkpoint_1100000/state/checkpoint" },
},
countDownloads: `path:"checkpoints/checkpoint_1100000/state/checkpoint"`,
},
timm: {
prettyLabel: "timm",
Expand All @@ -456,9 +422,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
docsUrl: "https://huggingface.co/docs/hub/timm",
snippets: snippets.timm,
filter: true,
countDownloads: {
terms: { path: ["pytorch_model.bin", "model.safetensors"] },
},
countDownloads: `path:"pytorch_model.bin" OR path:"model.safetensors"`,
},
transformers: {
prettyLabel: "Transformers",
Expand All @@ -482,9 +446,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
repoUrl: "https://github.com/Unity-Technologies/sentis-samples",
snippets: snippets.sentis,
filter: true,
countDownloads: {
wildcard: { path: "*.sentis" },
},
countDownloads: `path_extension:"sentis"`,
},
voicecraft: {
prettyLabel: "VoiceCraft",
Expand All @@ -497,9 +459,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
prettyLabel: "WhisperKit",
repoName: "WhisperKit",
repoUrl: "https://github.com/argmaxinc/WhisperKit",
countDownloads: {
wildcard: { path: "*/model.mil" },
},
countDownloads: `path_filename:"model.mil" AND path_prefix:*`,
},
} satisfies Record<string, LibraryUiElement>;

Expand Down

0 comments on commit 6ad0e97

Please sign in to comment.