Fixed blocking calls to async functions when loading/saving AudioSegm…

…ents
nimroddolev · Jun 6, 2024 · 0bb4fa6 · 0bb4fa6
1 parent e44d273
commit 0bb4fa6
Show file tree

Hide file tree

Showing 3 changed files with 60 additions and 32 deletions.
diff --git a/custom_components/chime_tts/__init__.py b/custom_components/chime_tts/__init__.py
@@ -7,6 +7,7 @@
 from datetime import datetime
 
 from pydub import AudioSegment # type: ignore
+from pydub.exceptions import CouldntDecodeError
 
 from homeassistant.components.media_player.const import (
     ATTR_MEDIA_CONTENT_ID,
@@ -535,7 +536,7 @@ async def async_request_tts_audio(
             if file is None:
                 _LOGGER.error("...could not convert TTS bytes to audio")
                 return None
-            audio = AudioSegment.from_file(file)
+            audio = await filesystem_helper.async_load_audio(file)
             if audio is not None:
 
                 # Done
@@ -712,7 +713,9 @@ async def async_get_playback_audio_path(params: dict, options: dict):
     if output_audio is not None:
         initial_save_folder_key = TEMP_PATH_KEY if is_local else WWW_PATH_KEY
         _LOGGER.debug(" - Saving mp3 file to folder: %s...", _data.get(initial_save_folder_key, ""))
-        new_audio_file = filesystem_helper.save_audio_to_folder(output_audio, _data.get(initial_save_folder_key, None))
+        new_audio_file = await filesystem_helper.async_save_audio_to_folder(
+            output_audio,
+            _data.get(initial_save_folder_key, None))
         if new_audio_file is None:
             _LOGGER.debug("   ...error saving file")
             return None
@@ -728,7 +731,14 @@ async def async_get_playback_audio_path(params: dict, options: dict):
             else:
                 _LOGGER.warning("...FFmpeg audio conversion failed. Continuing using the original audio file")
 
-        duration = len(AudioSegment.from_file(new_audio_file)) / 1000.0
+        try:
+            new_audio_segment = await filesystem_helper.async_load_audio(new_audio_file)
+        except CouldntDecodeError:
+            raise ValueError("The file format is not supported or the file is corrupted.")
+        except Exception as e:
+            raise RuntimeError(f"An unexpected error occurred: {e}")
+
+        duration = len(new_audio_segment) / 1000.0
         audio_dict[AUDIO_DURATION_KEY] = duration
         audio_dict[LOCAL_PATH_KEY if is_local else PUBLIC_PATH_KEY] = new_audio_file
         audio_dict[ATTR_MEDIA_CONTENT_ID] = media_player_helper.get_media_content_id(audio_dict.get(LOCAL_PATH_KEY, None)
@@ -739,7 +749,9 @@ async def async_get_playback_audio_path(params: dict, options: dict):
         for folder_key in [(LOCAL_PATH_KEY if is_local else None), (PUBLIC_PATH_KEY if is_public else None)]:
             if folder_key is not None and audio_dict.get(folder_key, None) is None:
                 _LOGGER.debug(" - Saving generated audio to folder %s ...", _data.get(folder_key, ""))
-                audio_dict[folder_key] = filesystem_helper.save_audio_to_folder(output_audio, _data.get(folder_key, None))
+                audio_dict[folder_key] = await filesystem_helper.async_save_audio_to_folder(
+                    output_audio,
+                    _data.get(folder_key, None))
                 if audio_dict[folder_key] is None:
                     _LOGGER.error("Error saving audio to folder %s...", _data.get(LOCAL_PATH_KEY, ""))
             # Save path to cache
@@ -863,7 +875,7 @@ async def async_process_segments(hass, message, output_audio, params, options):
                 }
                 segment_filepath_hash = get_filename_hash_from_service_data({**segment_params}, {**segment_options})
 
-                tts_audio = None
+                tts_audio: AudioSegment = None
 
                 # Use cached TTS audio
                 if segment_cache is True:
@@ -896,8 +908,9 @@ async def async_process_segments(hass, message, output_audio, params, options):
                         tts_audio_duration = float(len(tts_audio) / 1000.0)
                         if segment_cache is True and audio_dict is None:
                             _LOGGER.debug(" - Saving generated TTS audio to cache...")
-                            tts_audio_full_path = filesystem_helper.save_audio_to_folder(
-                                tts_audio, _data.get(TEMP_PATH_KEY, None))
+                            tts_audio_full_path = await filesystem_helper.async_save_audio_to_folder(
+                                tts_audio,
+                                _data.get(TEMP_PATH_KEY, None))
                             if tts_audio_full_path is not None:
                                 audio_dict = {
                                     LOCAL_PATH_KEY: tts_audio_full_path,
@@ -910,10 +923,10 @@ async def async_process_segments(hass, message, output_audio, params, options):
 
                 # TTS Audio manipulations
                 if tts_audio is not None:
-                    temp_folder =  _data.get(TEMP_PATH_KEY, None)
-                    tts_audio = helpers.change_speed_of_audiosegment(tts_audio, segment_tts_speed, temp_folder)
-                    tts_audio = helpers.change_pitch_of_audiosegment(tts_audio, segment_tts_pitch, temp_folder)
-                    tts_audio = helpers.ffmpeg_convert_from_audio_segment(tts_audio, segment_audio_conversion, temp_folder)
+                    temp_folder: str = _data.get(TEMP_PATH_KEY, None)
+                    tts_audio = await helpers.async_change_speed_of_audiosegment(tts_audio, segment_tts_speed, temp_folder)
+                    tts_audio = await helpers.async_change_pitch_of_audiosegment(tts_audio, segment_tts_pitch, temp_folder)
+                    tts_audio = await helpers.async_ffmpeg_convert_from_audio_segment(tts_audio, segment_audio_conversion, temp_folder)
 
                 # Combine audio
                 if tts_audio is not None:
@@ -967,10 +980,10 @@ async def async_get_audio_from_path(
 
         _LOGGER.debug(' - Retrieving audio from path: "%s"...', filepath)
         try:
-            audio_from_path: AudioSegment = AudioSegment.from_file(filepath)
+            audio_from_path: AudioSegment = await filesystem_helper.async_load_audio(filepath)
             if audio_conversion is not None and len(audio_conversion) > 0:
                 _LOGGER.debug("  - Performing FFmpeg audio conversion of audio file...")
-                audio_from_path = helpers.ffmpeg_convert_from_audio_segment(audio_from_path)
+                audio_from_path = await helpers.async_ffmpeg_convert_from_audio_segment(audio_from_path)
 
             # Remove downloaded file when cache=false
             if cache is False and file_hash is not None:

diff --git a/custom_components/chime_tts/helpers/filesystem.py b/custom_components/chime_tts/helpers/filesystem.py
@@ -8,6 +8,7 @@
 from io import BytesIO
 import re
 import requests
+import asyncio
 from pydub import AudioSegment
 from homeassistant.helpers.network import get_url
 from homeassistant.core import HomeAssistant
@@ -126,7 +127,7 @@ def get_downloaded_chime_path(self, folder: str, url: str):
         """Local file path string for chime URL in local folder."""
         return folder + ("" if folder.endswith("/") else "/") + re.sub(r'[\/:*?"<>|]', '_', url.replace("https://", "").replace("http://", ""))
 
-    def save_audio_to_folder(self, audio, folder, file_name: str = None):
+    async def async_save_audio_to_folder(self, audio: AudioSegment, folder, file_name: str = None):
         """Save audio to local folder."""
 
         folder_exists = self.create_folder(folder)
@@ -141,7 +142,7 @@ def save_audio_to_folder(self, audio, folder, file_name: str = None):
                     prefix=folder, suffix=".mp3"
                 ) as temp_obj:
                     audio_full_path = temp_obj.name
-                audio.export(audio_full_path, format="mp3")
+                await self.export_audio(audio, audio_full_path)
             except Exception as error:
                 _LOGGER.warning(
                     "An error occurred when creating the temp mp3 file: %s", error
@@ -154,7 +155,7 @@ def save_audio_to_folder(self, audio, folder, file_name: str = None):
                 if audio_full_path and isinstance(audio_full_path, str):
                     if os.path.exists(audio_full_path):
                         os.remove(audio_full_path)
-                    audio.export(audio_full_path, format="mp3")
+                    await self.export_audio(audio, audio_full_path)
             except Exception as error:
                 _LOGGER.warning(
                     "An error occurred when creating the mp3 file: %s", error
@@ -199,13 +200,14 @@ async def async_download_file(self, hass: HomeAssistant, url, folder):
             _LOGGER.debug("Audio downloaded successfully")
             _, file_extension = os.path.splitext(url)
             try:
-                audio_content = AudioSegment.from_file(BytesIO(response.content),
-                                                       format=file_extension.replace(".", ""))
+                audio_content = await self.async_load_audio(
+                    BytesIO(response.content),
+                    format=file_extension.replace(".", ""))
             except Exception as error:
                 _LOGGER.warning("Error when loading audio from downloaded file: %s", str(error))
                 return None
             if audio_content is not None:
-                audio_file_path = self.save_audio_to_folder(audio=audio_content,
+                audio_file_path = self.async_save_audio_to_folder(audio=audio_content,
                                                             folder=folder,
                                                             file_name=url)
                 audio_duration = float(len(audio_content) / 1000)
@@ -315,3 +317,13 @@ def make_folder_path_safe(self, path):
             path = f"{path}/"
         path = path.replace("//", "/")
         return path
+
+    ### Offloading to asyncio.to_thread ####
+
+    async def export_audio(self, audio: AudioSegment, audio_full_path: str):
+        """Save AudioSegment to a filepath."""
+        await asyncio.to_thread(audio.export, audio_full_path, format="mp3")
+
+    async def async_load_audio(self, file_path: str):
+        """Load AudioSegment from a filepath."""
+        return await asyncio.to_thread(AudioSegment.from_file, file_path)
diff --git a/custom_components/chime_tts/helpers/helpers.py b/custom_components/chime_tts/helpers/helpers.py
@@ -383,7 +383,7 @@ def get_default_tts_platform(self, hass: HomeAssistant, default_tts_platform: st
         _LOGGER.error("Chime TTS could not find any TTS platforms installed. Please add at least 1 TTS integration: https://www.home-assistant.io/integrations/#text-to-speech")
         return False
 
-    def ffmpeg_convert_from_audio_segment(self,
+    async def async_ffmpeg_convert_from_audio_segment(self,
                                           audio_segment: AudioSegment = None,
                                           ffmpeg_args: str = "",
                                           folder: str = ""):
@@ -394,9 +394,10 @@ def ffmpeg_convert_from_audio_segment(self,
 
         # Save to temp file
         temp_filename = "temp_segment.mp3"
-        temp_audio_file = filesystem_helper.save_audio_to_folder(audio=audio_segment,
-                                                                 folder=folder,
-                                                                 file_name=temp_filename)
+        temp_audio_file = await filesystem_helper.async_save_audio_to_folder(
+            audio=audio_segment,
+            folder=folder,
+            file_name=temp_filename)
         if not temp_audio_file:
             full_path = f"{folder}/{temp_filename}"
             _LOGGER.warning("ffmpeg_convert_from_audio_segment - Unable to store audio segment to: %s", full_path)
@@ -410,7 +411,7 @@ def ffmpeg_convert_from_audio_segment(self,
         # Load new AudioSegment from converted file
         else:
             try:
-                ret_val = AudioSegment.from_file(str(converted_audio_file))
+                ret_val = await filesystem_helper.async_load_audio(str(converted_audio_file))
             except Exception as error:
                 _LOGGER.warning("ffmpeg_convert_from_audio_segment - Unable to load converted audio segment from file: %s. Error: %s",
                                 str(converted_audio_file), error)
@@ -527,7 +528,7 @@ def add_atempo_values_to_ffmpeg_args_string(self, tempo: float, ffmpeg_args_stri
 
         return ffmpeg_args_string
 
-    def change_speed_of_audiosegment(self, audio_segment: AudioSegment, speed: float = 100.0, temp_folder: str = None):
+    async def async_change_speed_of_audiosegment(self, audio_segment: AudioSegment, speed: float = 100.0, temp_folder: str = None):
         """Change the playback speed of an audio segment."""
         if not audio_segment or speed == 100 or speed < 1 or speed > 500:
             if not audio_segment:
@@ -542,11 +543,12 @@ def change_speed_of_audiosegment(self, audio_segment: AudioSegment, speed: float
 
         ffmpeg_args_string = self.add_atempo_values_to_ffmpeg_args_string(tempo)
 
-        return self.ffmpeg_convert_from_audio_segment(audio_segment=audio_segment,
-                                                      ffmpeg_args=ffmpeg_args_string,
-                                                      folder=temp_folder)
+        return await self.async_ffmpeg_convert_from_audio_segment(
+            audio_segment=audio_segment,
+            ffmpeg_args=ffmpeg_args_string,
+            folder=temp_folder)
 
-    def change_pitch_of_audiosegment(self, audio_segment: AudioSegment, pitch: int = 0, temp_folder: str = None):
+    async def async_change_pitch_of_audiosegment(self, audio_segment: AudioSegment, pitch: int = 0, temp_folder: str = None):
         """Change the pitch of an audio segment."""
         if not audio_segment:
             _LOGGER.warning("Cannot change TTS audio pitch. No audio available")
@@ -566,9 +568,10 @@ def change_pitch_of_audiosegment(self, audio_segment: AudioSegment, pitch: int =
         frame_rate = audio_segment.frame_rate
         ffmpeg_args_string = f"-af asetrate={frame_rate}*{pitch_shift}"
         ffmpeg_args_string = self.add_atempo_values_to_ffmpeg_args_string(tempo_adjustment, ffmpeg_args_string)
-        return self.ffmpeg_convert_from_audio_segment(audio_segment=audio_segment,
-                                                      ffmpeg_args=ffmpeg_args_string,
-                                                      folder=temp_folder)
+        return await self.async_ffmpeg_convert_from_audio_segment(
+            audio_segment=audio_segment,
+            ffmpeg_args=ffmpeg_args_string,
+            folder=temp_folder)
 
     def combine_audio(self,
                       audio_1: AudioSegment,