From 074b768ba6eee4fb6dd1b2493c258cee26d2ed75 Mon Sep 17 00:00:00 2001
From: "Rossdan Craig rossdan@lastmileai.dev" <>
Date: Wed, 10 Jan 2024 01:39:03 -0500
Subject: [PATCH 1/3] [HF][streaming][1/n] Text Summarization
TSIA
Adding streaming functionality to text summarization model parser
## Test Plan
Rebase onto and test it with https://github.com/lastmile-ai/aiconfig/pull/852/commits/11ace0a5a31b9404a97965e5fa478d5b19adcb67.
Follow the README from AIConfig Editor https://github.com/lastmile-ai/aiconfig/tree/main/python/src/aiconfig/editor#dev, then run these command
```bash
aiconfig_path=/Users/rossdancraig/Projects/aiconfig/cookbooks/Gradio/huggingface.aiconfig.json
parsers_path=/Users/rossdancraig/Projects/aiconfig/cookbooks/Gradio/hf_model_parsers.py
alias aiconfig="python3 -m 'aiconfig.scripts.aiconfig_cli'"
aiconfig edit --aiconfig-path=$aiconfig_path --server-port=8080 --server-mode=debug_servers --parsers-module-path=$parsers_path
```
Then in AIConfig Editor run the prompt (it will be streaming format by default)
https://github.com/lastmile-ai/aiconfig/assets/151060367/e91a1d8b-a3e9-459c-9eb1-2d8e5ec58e73
---
.../local_inference/text_generation.py | 2 +-
.../local_inference/text_summarization.py | 11 +++++++++--
2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_generation.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_generation.py
index e0941b0fd..c6218c82e 100644
--- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_generation.py
+++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_generation.py
@@ -251,7 +251,7 @@ async def run_inference(
not "stream" in completion_data or completion_data.get("stream") != False
)
if should_stream:
- tokenizer : AutoTokenizer = AutoTokenizer.from_pretrained(model_name)
+ tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained(model_name)
streamer = TextIteratorStreamer(tokenizer)
completion_data["streamer"] = streamer
diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py
index bba735b4f..32b90b908 100644
--- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py
+++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_summarization.py
@@ -128,13 +128,18 @@ def construct_stream_output(
"metadata": {},
}
)
+
accumulated_message = ""
for new_text in streamer:
if isinstance(new_text, str):
+ # For some reason these symbols aren't filtered out by the streamer
+ new_text = new_text.replace("", "")
+ new_text = new_text.replace("", "")
+
accumulated_message += new_text
options.stream_callback(new_text, accumulated_message, 0)
-
output.data = accumulated_message
+
return output
@@ -245,7 +250,9 @@ async def run_inference(self, prompt: Prompt, aiconfig: "AIConfigRuntime", optio
# if stream enabled in runtime options and config, then stream. Otherwise don't stream.
streamer = None
- should_stream = (options.stream if options else False) and (not "stream" in completion_data or completion_data.get("stream") != False)
+ should_stream = (options.stream if options else False) and (
+ not "stream" in completion_data or completion_data.get("stream") != False
+ )
if should_stream:
tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained(model_name)
streamer = TextIteratorStreamer(tokenizer)
From 13a4c6ed44a3307807bb5a8b8a8e7407f4f4e45b Mon Sep 17 00:00:00 2001
From: "Rossdan Craig rossdan@lastmileai.dev" <>
Date: Wed, 10 Jan 2024 02:08:54 -0500
Subject: [PATCH 2/3] [HF][streaming][2/n] Text Translation
TSIA
Adding streaming output support for text translation model parser. I also fixed a bug where we didn't pass in `"translation"` key into the pipeline
## Test Plan
Rebase onto and test it: https://github.com/lastmile-ai/aiconfig/commit/5b7434483e2521110bbe3cc3380d9b99a6d4e8be.
Follow the README from AIConfig Editor https://github.com/lastmile-ai/aiconfig/tree/main/python/src/aiconfig/editor#dev, then run these command
```bash
aiconfig_path=/Users/rossdancraig/Projects/aiconfig/cookbooks/Gradio/huggingface.aiconfig.json
parsers_path=/Users/rossdancraig/Projects/aiconfig/cookbooks/Gradio/hf_model_parsers.py
alias aiconfig="python3 -m 'aiconfig.scripts.aiconfig_cli'"
aiconfig edit --aiconfig-path=$aiconfig_path --server-port=8080 --server-mode=debug_servers --parsers-module-path=$parsers_path
```
With Streaming
https://github.com/lastmile-ai/aiconfig/assets/151060367/d7bc9df2-2993-4709-bf9b-c5b7979fb00f
Without Streaming
https://github.com/lastmile-ai/aiconfig/assets/151060367/71eb6ab3-5d6f-4c5d-8b82-f3daf4c5e610
---
.../HuggingFace/python/requirements.txt | 5 ++--
.../local_inference/text_translation.py | 24 +++++++++++++++----
2 files changed, 22 insertions(+), 7 deletions(-)
diff --git a/extensions/HuggingFace/python/requirements.txt b/extensions/HuggingFace/python/requirements.txt
index 6388e1c9e..79e5db10b 100644
--- a/extensions/HuggingFace/python/requirements.txt
+++ b/extensions/HuggingFace/python/requirements.txt
@@ -10,11 +10,12 @@ huggingface_hub
#Hugging Face Libraries - Local Inference Tranformers & Diffusors
accelerate # Used to help speed up image generation
-diffusers # Used for image + audio generation
+diffusers # Used for image generation
+scipy # array -> wav file, text-speech. torchaudio.save seems broken.
+sentencepiece # Used for text translation
torch
torchvision
torchaudio
-scipy # array -> wav file, text-speech. torchaudio.save seems broken.
transformers # Used for text generation
#Other
diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_translation.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_translation.py
index 9ee8bb357..860a11e46 100644
--- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_translation.py
+++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_translation.py
@@ -129,12 +129,19 @@ def construct_stream_output(
"metadata": {},
}
)
+
accumulated_message = ""
for new_text in streamer:
if isinstance(new_text, str):
+ # For some reason these symbols aren't filtered out by the streamer
+ new_text = new_text.replace("", "")
+ new_text = new_text.replace("", "")
+ new_text = new_text.replace("", "")
+
accumulated_message += new_text
options.stream_callback(new_text, accumulated_message, 0)
output.data = accumulated_message
+
return output
@@ -240,19 +247,26 @@ async def run_inference(self, prompt: Prompt, aiconfig: "AIConfigRuntime", optio
model_name: str = aiconfig.get_model_name(prompt)
if isinstance(model_name, str) and model_name not in self.translators:
- self.translators[model_name] = pipeline(model_name)
+ self.translators[model_name] = pipeline("translation", model_name)
translator = self.translators[model_name]
# if stream enabled in runtime options and config, then stream. Otherwise don't stream.
streamer = None
- should_stream = (options.stream if options else False) and (not "stream" in completion_data or completion_data.get("stream") != False)
+ should_stream = (options.stream if options else False) and (
+ not "stream" in completion_data or completion_data.get("stream") != False
+ )
if should_stream:
- raise NotImplementedError("Streaming is not supported for HuggingFace Text Translation")
+ tokenizer: AutoTokenizer = AutoTokenizer.from_pretrained(model_name)
+ streamer = TextIteratorStreamer(tokenizer)
+ completion_data["streamer"] = streamer
+
+ def _translate():
+ return translator(inputs, **completion_data)
outputs: List[Output] = []
output = None
if not should_stream:
- response: List[Any] = translator(inputs, **completion_data)
+ response: List[Any] = _translate()
for count, result in enumerate(response):
output = construct_regular_output(result, count)
outputs.append(output)
@@ -263,7 +277,7 @@ async def run_inference(self, prompt: Prompt, aiconfig: "AIConfigRuntime", optio
raise ValueError("Stream option is selected but streamer is not initialized")
# For streaming, cannot call `translator` directly otherwise response will be blocking
- thread = threading.Thread(target=translator, kwargs=completion_data)
+ thread = threading.Thread(target=_translate)
thread.start()
output = construct_stream_output(streamer, options)
if output is not None:
From 1f161e5b4fafdad0980735d45f66f532ea993749 Mon Sep 17 00:00:00 2001
From: "Rossdan Craig rossdan@lastmileai.dev" <>
Date: Wed, 10 Jan 2024 02:49:07 -0500
Subject: [PATCH 3/3] [HF][streaming][3/n] Text2Speech (no streaming, but
updating docs on completion params)
Ok this one is weird. Today, streaming is only ever supported on text outputs in Transformers library. See `BaseStreamer` in here: https://github.com/search?q=repo%3Ahuggingface%2Ftransformers%20BaseStreamer&type=code
In the future it may support other formats, but not yet. For example, OpenAI supports it: https://community.openai.com/t/streaming-from-text-to-speech-api/493784
Anyways, I basically here only did some updates to docs to clarify why completion params were null. Jonathan and I synced about this briefly ofline, but I forgot again so wanted to capture it here so no one forgets
---
.../local_inference/text_2_speech.py | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py
index 85dee4add..97e172fde 100644
--- a/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py
+++ b/extensions/HuggingFace/python/src/aiconfig_extension_hugging_face/local_inference/text_2_speech.py
@@ -25,6 +25,8 @@
# Step 1: define Helpers
def refine_pipeline_creation_params(model_settings: Dict[str, Any]) -> List[Dict[str, Any]]:
+ # There are from the transformers Github repo:
+ # https://github.com/huggingface/transformers/blob/main/src/transformers/modeling_utils.py#L2534
supported_keys = {
"torch_dtype",
"force_download",
@@ -61,9 +63,11 @@ def refine_pipeline_creation_params(model_settings: Dict[str, Any]) -> List[Dict
def refine_completion_params(unfiltered_completion_params: Dict[str, Any]) -> Dict[str, Any]:
- supported_keys = {
- # ???
- }
+ # Note: There seems to be no public API docs on what completion
+ # params are supported for text to speech:
+ # https://huggingface.co/docs/transformers/tasks/text-to-speech#inference
+ # The only one mentioned is `forward_params` which can contain `speaker_embeddings`
+ supported_keys = {}
completion_params: Dict[str, Any] = {}
for key in unfiltered_completion_params: