diff --git a/packages/tasks/src/model-libraries-downloads.ts b/packages/tasks/src/model-libraries-downloads.ts index 2a68b8539..5273f0db7 100644 --- a/packages/tasks/src/model-libraries-downloads.ts +++ b/packages/tasks/src/model-libraries-downloads.ts @@ -6,15 +6,13 @@ * Read this doc about download stats on the Hub: * * https://huggingface.co/docs/hub/models-download-stats - * + * Available fields: + * - path: the complete path of the model + * - path_prefix: the prefix of the path of the model + * - path_extension: the extension of the path of the model + * - path_filename: the extension of the path of the model * see also: - * https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html + * https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html */ -export type ElasticBoolQueryFilter = - // match a single filename - | { term?: { path: string } } - // match multiple possible filenames - | { terms?: { path: string[] } } - // match a wildcard - | { wildcard?: { path: string } }; +export type ElasticSearchQuery = string; diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts index 51cd69c42..b27215590 100644 --- a/packages/tasks/src/model-libraries.ts +++ b/packages/tasks/src/model-libraries.ts @@ -1,6 +1,6 @@ import * as snippets from "./model-libraries-snippets"; import type { ModelData } from "./model-data"; -import type { ElasticBoolQueryFilter } from "./model-libraries-downloads"; +import type { ElasticSearchQuery } from "./model-libraries-downloads"; /** * Elements configurable by a model library. @@ -34,7 +34,7 @@ export interface LibraryUiElement { * By default, those files are counted: * "config.json", "config.yaml", "hyperparams.yaml", "meta.yaml" */ - countDownloads?: ElasticBoolQueryFilter; + countDownloads?: ElasticSearchQuery; /** * should we display this library in hf.co/models filter * (only for popular libraries with > 100 models) @@ -65,9 +65,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://huggingface.co/docs/hub/adapters", snippets: snippets.adapters, filter: true, - countDownloads: { - term: { path: "adapter_config.json" }, - }, + countDownloads: `path:"adapter_config.json"`, }, allennlp: { prettyLabel: "AllenNLP", @@ -84,9 +82,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://huggingface.co/docs/hub/asteroid", snippets: snippets.asteroid, filter: true, - countDownloads: { - term: { path: "pytorch_model.bin" }, - }, + countDownloads: `path:"pytorch_model.bin"`, }, audiocraft: { prettyLabel: "Audiocraft", @@ -107,17 +103,15 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { repoName: "big_vision", repoUrl: "https://github.com/google-research/big_vision", filter: false, - countDownloads: { - wildcard: { path: "*.npz" }, - }, + countDownloads: `path_extension:"npz"`, }, chat_tts: { prettyLabel: "ChatTTS", repoName: "ChatTTS", repoUrl: "https://github.com/2noise/ChatTTS.git", - filter: false, - countDownloads: { term: { path: "asset/GPT.pt" } }, snippets: snippets.chattts, + filter: false, + countDownloads: `path:"asset/GPT.pt"`, }, diffusers: { prettyLabel: "Diffusers", @@ -140,18 +134,14 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://aphp.github.io/edsnlp/latest/", filter: false, snippets: snippets.edsnlp, - countDownloads: { - wildcard: { path: "*/config.cfg" }, - }, + countDownloads: `path_filename:"config.cfg" AND path_prefix:*`, }, elm: { prettyLabel: "ELM", repoName: "elm", repoUrl: "https://github.com/slicex-ai/elm", filter: false, - countDownloads: { - wildcard: { path: "*/slicex_elm_config.json" }, - }, + countDownloads: `path_filename:"slicex_elm_config.json" AND path_prefix:*`, }, espnet: { prettyLabel: "ESPnet", @@ -182,9 +172,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { repoUrl: "https://fasttext.cc/", snippets: snippets.fasttext, filter: true, - countDownloads: { - wildcard: { path: "*.bin" }, - }, + countDownloads: `path_extension:"bin"`, }, flair: { prettyLabel: "Flair", @@ -193,16 +181,14 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://huggingface.co/docs/hub/flair", snippets: snippets.flair, filter: true, - countDownloads: { - term: { path: "pytorch_model.bin" }, - }, + countDownloads: `path:"pytorch_model.bin"`, }, "gemma.cpp": { prettyLabel: "gemma.cpp", repoName: "gemma.cpp", repoUrl: "https://github.com/google/gemma.cpp", filter: false, - countDownloads: { wildcard: { path: "*.sbs" } }, + countDownloads: `path_extension:"sbs"`, }, gliner: { prettyLabel: "GLiNER", @@ -210,18 +196,14 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { repoUrl: "https://github.com/urchade/GLiNER", snippets: snippets.gliner, filter: false, - countDownloads: { - term: { path: "gliner_config.json" }, - }, + countDownloads: `path:"gliner_config.json"`, }, grok: { prettyLabel: "Grok", repoName: "Grok", repoUrl: "https://github.com/xai-org/grok-1", filter: false, - countDownloads: { - terms: { path: ["ckpt/tensor00000_000", "ckpt-0/tensor00000_000"] }, - }, + countDownloads: `path:"ckpt/tensor00000_000" OR path:"ckpt-0/tensor00000_000"`, }, keras: { prettyLabel: "Keras", @@ -230,7 +212,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://huggingface.co/docs/hub/keras", snippets: snippets.keras, filter: true, - countDownloads: { term: { path: "saved_model.pb" } }, + countDownloads: `path:"saved_model.pb"`, }, "keras-nlp": { prettyLabel: "KerasNLP", @@ -256,7 +238,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://huggingface.co/docs/hub/ml-agents", snippets: snippets.mlAgents, filter: true, - countDownloads: { wildcard: { path: "*.onnx" } }, + countDownloads: `path_extension:"onnx"`, }, mlx: { prettyLabel: "MLX", @@ -272,7 +254,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://huggingface.co/docs/hub/mlx-image", snippets: snippets.mlxim, filter: false, - countDownloads: { term: { path: "model.safetensors" } }, + countDownloads: `path:"model.safetensors"`, }, nemo: { prettyLabel: "NeMo", @@ -280,7 +262,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { repoUrl: "https://github.com/NVIDIA/NeMo", snippets: snippets.nemo, filter: true, - countDownloads: { wildcard: { path: "*.nemo" } }, + countDownloads: `path_extension:"nemo"`, }, open_clip: { prettyLabel: "OpenCLIP", @@ -288,7 +270,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { repoUrl: "https://github.com/mlfoundations/open_clip", snippets: snippets.open_clip, filter: true, - countDownloads: { wildcard: { path: "*pytorch_model.bin" } }, + countDownloads: `path_extension:"bin" AND path_filename:*pytorch_model`, }, paddlenlp: { prettyLabel: "paddlenlp", @@ -297,9 +279,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://huggingface.co/docs/hub/paddlenlp", snippets: snippets.paddlenlp, filter: true, - countDownloads: { - term: { path: "model_config.json" }, - }, + countDownloads: `path:"model_config.json"`, }, peft: { prettyLabel: "PEFT", @@ -307,9 +287,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { repoUrl: "https://github.com/huggingface/peft", snippets: snippets.peft, filter: true, - countDownloads: { - term: { path: "adapter_config.json" }, - }, + countDownloads: `path:"adapter_config.json"`, }, "pyannote-audio": { prettyLabel: "pyannote.audio", @@ -330,7 +308,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { repoName: "recurrentgemma", repoUrl: "https://github.com/google-deepmind/recurrentgemma", filter: false, - countDownloads: { term: { path: "tokenizer.model" } }, + countDownloads: `path:"tokenizer.model"`, }, "sample-factory": { prettyLabel: "sample-factory", @@ -339,7 +317,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://huggingface.co/docs/hub/sample-factory", snippets: snippets.sampleFactory, filter: true, - countDownloads: { term: { path: "cfg.json" } }, + countDownloads: `path:"cfg.json"`, }, "sentence-transformers": { prettyLabel: "sentence-transformers", @@ -363,9 +341,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { repoUrl: "https://github.com/scikit-learn/scikit-learn", snippets: snippets.sklearn, filter: true, - countDownloads: { - term: { path: "sklearn_model.joblib" }, - }, + countDownloads: `path:"sklearn_model.joblib"`, }, spacy: { prettyLabel: "spaCy", @@ -374,9 +350,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://huggingface.co/docs/hub/spacy", snippets: snippets.spacy, filter: true, - countDownloads: { - wildcard: { path: "*.whl" }, - }, + countDownloads: `path_extension:"whl"`, }, "span-marker": { prettyLabel: "SpanMarker", @@ -393,16 +367,14 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://huggingface.co/docs/hub/speechbrain", snippets: snippets.speechbrain, filter: true, - countDownloads: { - term: { path: "hyperparams.yaml" }, - }, + countDownloads: `path:"hyperparams.yaml"`, }, "stable-audio-tools": { prettyLabel: "Stable Audio Tools", repoName: "stable-audio-tools", repoUrl: "https://github.com/Stability-AI/stable-audio-tools.git", filter: false, - countDownloads: { term: { path: "model.safetensors" } }, + countDownloads: `path:"model.safetensors"`, snippets: snippets.stable_audio_tools, }, "stable-baselines3": { @@ -412,9 +384,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://huggingface.co/docs/hub/stable-baselines3", snippets: snippets.stableBaselines3, filter: true, - countDownloads: { - wildcard: { path: "*.zip" }, - }, + countDownloads: `path_extension:"zip"`, }, stanza: { prettyLabel: "Stanza", @@ -423,9 +393,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://huggingface.co/docs/hub/stanza", snippets: snippets.stanza, filter: true, - countDownloads: { - term: { path: "models/default.zip" }, - }, + countDownloads: `path:"models/default.zip"`, }, tensorflowtts: { prettyLabel: "TensorFlowTTS", @@ -438,16 +406,14 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { repoName: "TiC-CLIP", repoUrl: "https://github.com/apple/ml-tic-clip", filter: false, - countDownloads: { wildcard: { path: "checkpoints/*.pt" } }, + countDownloads: `path_extension:"pt" AND path_prefix:"checkpoints"`, }, timesfm: { prettyLabel: "TimesFM", repoName: "timesfm", repoUrl: "https://github.com/google-research/timesfm", filter: false, - countDownloads: { - term: { path: "checkpoints/checkpoint_1100000/state/checkpoint" }, - }, + countDownloads: `path:"checkpoints/checkpoint_1100000/state/checkpoint"`, }, timm: { prettyLabel: "timm", @@ -456,9 +422,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { docsUrl: "https://huggingface.co/docs/hub/timm", snippets: snippets.timm, filter: true, - countDownloads: { - terms: { path: ["pytorch_model.bin", "model.safetensors"] }, - }, + countDownloads: `path:"pytorch_model.bin" OR path:"model.safetensors"`, }, transformers: { prettyLabel: "Transformers", @@ -482,9 +446,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { repoUrl: "https://github.com/Unity-Technologies/sentis-samples", snippets: snippets.sentis, filter: true, - countDownloads: { - wildcard: { path: "*.sentis" }, - }, + countDownloads: `path_extension:"sentis"`, }, voicecraft: { prettyLabel: "VoiceCraft", @@ -497,9 +459,7 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { prettyLabel: "WhisperKit", repoName: "WhisperKit", repoUrl: "https://github.com/argmaxinc/WhisperKit", - countDownloads: { - wildcard: { path: "*/model.mil" }, - }, + countDownloads: `path_filename:"model.mil" AND path_prefix:*`, }, } satisfies Record;