diff --git a/example/LLM/musicgen/README.md b/example/LLM/musicgen/README.md
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/example/LLM/musicgen/runtime.yaml b/example/LLM/musicgen/runtime.yaml
deleted file mode 100644
index cc31fd72dd..0000000000
--- a/example/LLM/musicgen/runtime.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-# from https://github.com/facebookresearch/audiocraft/tree/main#installation
-name: musicgen
-mode: venv
-environment:
- arch: noarch
- os: ubuntu:20.04
- cuda: 11.7
- python: 3.9
-configs:
- pip:
- index_url: https://mirrors.aliyun.com/pypi/simple
-dependencies:
- - pip:
- # use the fixed commit of audiocraft
- - audiocraft @ git+https://github.com/facebookresearch/audiocraft.git@c9179f87d9f560c96cf6ec4de65a8fcf374c91c5
- - torch >= 2.0
diff --git a/example/text-to-music/README.md b/example/text-to-music/README.md
new file mode 100644
index 0000000000..64bfe16ba7
--- /dev/null
+++ b/example/text-to-music/README.md
@@ -0,0 +1,21 @@
+Text To Music
+======
+
+Examples for text-to-music offline batch evaluations and online demo.
+
+Models
+------
+
+- [musicGen](https://musicgen.com/): a powerful single Language Model (LM) redefining the boundaries of conditional music generation, with the ability to create high-quality music by taking cues from text descriptions or melodies.
+
+Datasets
+------
+
+MusicGen samples from website.
+
+What we learn from these examples?
+------
+
+- Build Starwhale Model for the text-to-music models.
+- Log audio artifact in the evaluation phase.
+- Write evaluation results summary by Starwhale Report.
diff --git a/example/text-to-music/datasets/README.md b/example/text-to-music/datasets/README.md
new file mode 100644
index 0000000000..c1193d305b
--- /dev/null
+++ b/example/text-to-music/datasets/README.md
@@ -0,0 +1,18 @@
+Datasets for text-to-music
+======
+
+MusicGen mini
+------
+
+Run command to build dataset:
+
+```bash
+python musicgen-mini.py
+```
+
+Run swcli command to show built dataset:
+
+```bash
+swcli dataset info musicgen-mini
+```
+
diff --git a/example/LLM/musicgen/dataset.py b/example/text-to-music/datasets/musicgen-mini.py
similarity index 91%
rename from example/LLM/musicgen/dataset.py
rename to example/text-to-music/datasets/musicgen-mini.py
index ee97a5bebc..5fe59b3777 100644
--- a/example/LLM/musicgen/dataset.py
+++ b/example/text-to-music/datasets/musicgen-mini.py
@@ -1,7 +1,7 @@
from starwhale import dataset
from starwhale.utils.debug import init_logger
-init_logger(4)
+init_logger(3)
# data from https://ai.honu.io/papers/musicgen/
desc_samples = [
@@ -29,15 +29,15 @@
"An energetic hip-hop music piece, with synth sounds and strong bass. There is a rhythmic hi-hat patten in the drums."
"90s rock song with electric guitar and heavy drums",
"An 80s driving pop song with heavy drums and synth pads in the background",
- " An energetic hip-hop music piece, with synth sounds and strong bass. There is a rhythmic hi-hat patten in the drums.",
+ "An energetic hip-hop music piece, with synth sounds and strong bass. There is a rhythmic hi-hat patten in the drums.",
]
def build_dataset() -> None:
print("Building musicgen dataset...")
with dataset("musicgen-mini") as ds:
- for desc in desc_samples:
- ds.append({"desc": desc})
+ for idx, desc in enumerate(desc_samples):
+ ds[idx] = {"desc": desc}
ds.commit()
diff --git a/example/LLM/musicgen/.gitignore b/example/text-to-music/musicgen/.gitignore
similarity index 100%
rename from example/LLM/musicgen/.gitignore
rename to example/text-to-music/musicgen/.gitignore
diff --git a/example/text-to-music/musicgen/README.md b/example/text-to-music/musicgen/README.md
new file mode 100644
index 0000000000..f8376f9087
--- /dev/null
+++ b/example/text-to-music/musicgen/README.md
@@ -0,0 +1,47 @@
+MusicGen Example Guides
+======
+
+[MusicGen](https://ai.honu.io/papers/musicgen/) a single Language Model (LM) that operates over several streams of compressed discrete music representation.
+
+- 🏔️ Homepage: ️
+- 🌋 Github: ️
+- 🏕️ Size: small(300M), melody(1.5B), medium(1.5B), large(3.3B)
+
+Login Starwhale Cloud
+------
+
+```bash
+swcli instance login --token "${TOKEN}" --alias cloud-cn https://cloud.starwhale.cn/
+```
+
+Build Starwhale Runtime
+------
+
+```bash
+swcli -vvv runtime build
+swcli runtime cp musicgen https://cloud.starwhale.cn/project/starwhale:llm_text_to_audio
+```
+
+Build Starwhale Model
+------
+
+Model name choices: `melody`, `medium`, `small` and `large`.
+
+```bash
+python3 build.py ${model_name}
+
+swcli runtime activate musicgen
+python3 build.py small
+swcli model cp musicgen-small https://cloud.starwhale.cn/project/starwhale:llm_text_to_audio
+```
+
+Run Starwhale Model
+------
+
+```bash
+# use model src dir
+swcli model run --workdir . --runtime musicgen --dataset musicgen-mini -m evaluation
+
+# use model package
+swcli model run --uri musicgen-small --runtime musicgen --dataset musicgen-mini
+```
diff --git a/example/LLM/musicgen/build.py b/example/text-to-music/musicgen/build.py
similarity index 79%
rename from example/LLM/musicgen/build.py
rename to example/text-to-music/musicgen/build.py
index ee3c69acc0..1624d7502d 100644
--- a/example/LLM/musicgen/build.py
+++ b/example/text-to-music/musicgen/build.py
@@ -5,7 +5,7 @@
from huggingface_hub import snapshot_download
from starwhale import model as starwhale_model
-from starwhale.utils import debug
+from starwhale import init_logger
try:
from .utils import (
@@ -22,7 +22,13 @@
)
from evaluation import music_predict
-debug.init_logger(4)
+# init_logger configures the logging level of starwhale:
+# -> 0: ERROR (default)
+# -> 1: WARNING
+# -> 2: INFO
+# -> 3: DEBUG
+# -> 4: TRACE
+init_logger(3)
def build_starwhale_model(model_name: str) -> None:
@@ -39,6 +45,8 @@ def build_starwhale_model(model_name: str) -> None:
)
prepare_build_model_package(model_name)
+
+ # Use Starwhale SDK to build Starwhale model Package, `swcli model build` is also available.
starwhale_model.build(name=f"musicgen-{model_name}", modules=[music_predict])
diff --git a/example/LLM/musicgen/evaluation.py b/example/text-to-music/musicgen/evaluation.py
similarity index 54%
rename from example/LLM/musicgen/evaluation.py
rename to example/text-to-music/musicgen/evaluation.py
index dac503bc14..641cf2f958 100644
--- a/example/LLM/musicgen/evaluation.py
+++ b/example/text-to-music/musicgen/evaluation.py
@@ -4,9 +4,8 @@
import typing as t
from tempfile import NamedTemporaryFile
-from audiocraft.models import MusicGen
+from transformers import AutoProcessor, MusicgenForConditionalGeneration
from audiocraft.data.audio import audio_write
-from audiocraft.models.loaders import load_lm_model, load_compression_model
from starwhale import Audio, MIMEType, evaluation
@@ -15,36 +14,31 @@
except ImportError:
from utils import get_model_name, PRETRAINED_MODELS_DIR
-duration = int(os.environ.get("DURATION", 10))
top_k = int(os.environ.get("TOP_K", 250))
top_p = int(os.environ.get("TOP_P", 0))
temperature = float(os.environ.get("TEMPERATURE", 1.0))
-cfg_coef = float(os.environ.get("CFG_COEF", 3.0))
max_input_length = int(os.environ.get("MAX_INPUT_LENGTH", 512))
+max_new_tokens = int(os.environ.get("MAX_NEW_TOKENS", 500)) # 500 tokens = 10 seconds
+guidance_scale = float(os.environ.get("GUIDANCE_SCALE", 3.0))
_g_model = None
+_g_processor = None
_g_model_name = None
-def _load_model() -> t.Tuple[MusicGen, str]:
- global _g_model, _g_model_name
+def _load_model() -> t.Tuple[t.Any, t.Any, str]:
+ global _g_model, _g_model_name, _g_processor
- if _g_model is None or _g_model_name is None:
- model_name = get_model_name()
- device = "cuda"
- c_model = load_compression_model(
- PRETRAINED_MODELS_DIR / model_name / "compression_state_dict.bin",
- device=device,
+ if _g_model is None or _g_model_name is None or _g_processor is None:
+ _g_model_name = get_model_name()
+ _g_model = MusicgenForConditionalGeneration.from_pretrained(
+ PRETRAINED_MODELS_DIR / _g_model_name
)
- l_model = load_lm_model(
- PRETRAINED_MODELS_DIR / model_name / "state_dict.bin", device=device
+ _g_model.to("cuda")
+ _g_processor = AutoProcessor.from_pretrained(
+ PRETRAINED_MODELS_DIR / _g_model_name
)
- if model_name == "melody":
- l_model.condition_provider.conditioners["self_wav"].match_len_on_eval = True
- _g_model = MusicGen(model_name, c_model, l_model)
- _g_model_name = model_name
-
- return _g_model, _g_model_name
+ return _g_model, _g_processor, _g_model_name
@evaluation.predict(
@@ -55,30 +49,36 @@ def _load_model() -> t.Tuple[MusicGen, str]:
)
def music_predict(data: dict) -> Audio:
# TODO: support batch prediction
- model, _ = _load_model()
- model.set_generation_params(
- duration=duration,
+ model, processor, _ = _load_model()
+ inputs = processor(
+ text=[data["desc"][:max_input_length]],
+ padding=True,
+ return_tensors="pt",
+ )
+ # TODO: support melody
+ outputs = model.generate(
+ **inputs.to("cuda"),
+ do_sample=True,
+ guidance_scale=guidance_scale,
+ max_new_tokens=max_new_tokens,
top_k=top_k,
top_p=top_p,
temperature=temperature,
- cfg_coef=cfg_coef,
)
- # TODO: support melody
- outputs = model.generate([data.desc[:max_input_length]])
output = outputs[0].detach().cpu().float()
with NamedTemporaryFile("wb", suffix=".wav", delete=True) as file:
fpath = audio_write(
stem_name=file.name,
wav=output,
- sample_rate=model.sample_rate,
+ sample_rate=model.config.audio_encoder.sampling_rate,
strategy="loudness",
loudness_headroom_db=16,
loudness_compressor=True,
add_suffix=False,
)
audio = Audio(
- fp=fpath,
+ fp=fpath.read_bytes(),
mime_type=MIMEType.WAV,
)
return audio
diff --git a/example/text-to-music/musicgen/requirements.txt b/example/text-to-music/musicgen/requirements.txt
new file mode 100644
index 0000000000..fc9b239762
--- /dev/null
+++ b/example/text-to-music/musicgen/requirements.txt
@@ -0,0 +1,4 @@
+torch==2.0.1
+transformers==4.31.0
+huggingface-hub # download hf models
+audiocraft==1.0.0
diff --git a/example/text-to-music/musicgen/runtime.yaml b/example/text-to-music/musicgen/runtime.yaml
new file mode 100644
index 0000000000..f1275a0476
--- /dev/null
+++ b/example/text-to-music/musicgen/runtime.yaml
@@ -0,0 +1,10 @@
+name: musicgen
+mode: venv
+environment:
+ arch: noarch
+ os: ubuntu:20.04
+ cuda: 11.7
+ python: 3.9
+ starwhale_version: 0.6.1 # Starwhale >= 0.6.0 supports log artifacts in the evaluation phase.
+dependencies:
+ - requirements.txt
diff --git a/example/LLM/musicgen/utils.py b/example/text-to-music/musicgen/utils.py
similarity index 100%
rename from example/LLM/musicgen/utils.py
rename to example/text-to-music/musicgen/utils.py