From 3fe694706ccb026f11c46da3edfd8fe72d16a21e Mon Sep 17 00:00:00 2001 From: Nagesh Kumar Mishra Date: Wed, 3 Jan 2024 20:18:49 +0530 Subject: [PATCH 1/3] Implemented OpenAI Text-to-Speech, added configuration key, and updated audio player for data handling. --- .../Voice AI.xcodeproj/project.pbxproj | 10 ++++ voice/voice-ai/x/Actions/AudioPlayer.swift | 13 +++++ .../x/AppConfiguration/AppConfig.swift | 9 ++- .../OpenAITextToSpeech.swift | 56 +++++++++++++++++++ .../SpeechRecognition/SpeechRecognition.swift | 16 +++++- 5 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 voice/voice-ai/x/SpeechRecognition/OpenAITextToSpeech.swift diff --git a/voice/voice-ai/Voice AI.xcodeproj/project.pbxproj b/voice/voice-ai/Voice AI.xcodeproj/project.pbxproj index 7cc9ef9f9..f84c093fe 100644 --- a/voice/voice-ai/Voice AI.xcodeproj/project.pbxproj +++ b/voice/voice-ai/Voice AI.xcodeproj/project.pbxproj @@ -241,6 +241,10 @@ B9B331A32AFB849000F6A9C9 /* StoreKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = B9B331A22AFB849000F6A9C9 /* StoreKit.framework */; }; B9BA337E2AE683EF00D7756D /* AudioPlayer.swift in Sources */ = {isa = PBXBuildFile; fileRef = B9BA337D2AE683EF00D7756D /* AudioPlayer.swift */; }; B9BB0FE02B3C69FC00E663F6 /* TwitterUI.swift in Sources */ = {isa = PBXBuildFile; fileRef = B9BB0FDF2B3C69FC00E663F6 /* TwitterUI.swift */; }; + B9BB0FE22B459FB100E663F6 /* OpenAITextToSpeech.swift in Sources */ = {isa = PBXBuildFile; fileRef = B9BB0FE12B459FB100E663F6 /* OpenAITextToSpeech.swift */; }; + B9BB0FE32B459FB100E663F6 /* OpenAITextToSpeech.swift in Sources */ = {isa = PBXBuildFile; fileRef = B9BB0FE12B459FB100E663F6 /* OpenAITextToSpeech.swift */; }; + B9BB0FE42B459FB100E663F6 /* OpenAITextToSpeech.swift in Sources */ = {isa = PBXBuildFile; fileRef = B9BB0FE12B459FB100E663F6 /* OpenAITextToSpeech.swift */; }; + B9BB0FE52B459FB100E663F6 /* OpenAITextToSpeech.swift in Sources */ = {isa = PBXBuildFile; fileRef = B9BB0FE12B459FB100E663F6 /* OpenAITextToSpeech.swift */; }; B9C4A81F2AEE594900327529 /* MockSpeechRecognition.swift in Sources */ = {isa = PBXBuildFile; fileRef = B9C4A81E2AEE594900327529 /* MockSpeechRecognition.swift */; }; B9C4A8282AEE867A00327529 /* ActionHandler.swift in Sources */ = {isa = PBXBuildFile; fileRef = B9C4A8262AEE861C00327529 /* ActionHandler.swift */; }; B9C4A8292AEE867E00327529 /* ActionHandler.swift in Sources */ = {isa = PBXBuildFile; fileRef = B9C4A8262AEE861C00327529 /* ActionHandler.swift */; }; @@ -484,6 +488,7 @@ B9B331A22AFB849000F6A9C9 /* StoreKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = StoreKit.framework; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS17.0.sdk/System/Library/Frameworks/StoreKit.framework; sourceTree = DEVELOPER_DIR; }; B9BA337D2AE683EF00D7756D /* AudioPlayer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioPlayer.swift; sourceTree = ""; }; B9BB0FDF2B3C69FC00E663F6 /* TwitterUI.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TwitterUI.swift; sourceTree = ""; }; + B9BB0FE12B459FB100E663F6 /* OpenAITextToSpeech.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpenAITextToSpeech.swift; sourceTree = ""; }; B9C4A81E2AEE594900327529 /* MockSpeechRecognition.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MockSpeechRecognition.swift; sourceTree = ""; }; B9C4A8262AEE861C00327529 /* ActionHandler.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ActionHandler.swift; sourceTree = ""; }; CD0D13342ADA73B300031EDD /* Voice AI.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "Voice AI.app"; sourceTree = BUILT_PRODUCTS_DIR; }; @@ -823,6 +828,7 @@ isa = PBXGroup; children = ( B9AD50532ADFE8A0006F18A1 /* SpeechRecognition.swift */, + B9BB0FE12B459FB100E663F6 /* OpenAITextToSpeech.swift */, ); path = SpeechRecognition; sourceTree = ""; @@ -1372,6 +1378,7 @@ CDC137162B11B129003386E9 /* TimeLogger.swift in Sources */, B91F05B12B049F720029A32D /* AppleSignInManager.swift in Sources */, E4D0D8E42B2384F800F717A2 /* AlertManager.swift in Sources */, + B9BB0FE52B459FB100E663F6 /* OpenAITextToSpeech.swift in Sources */, B91F05B72B04B3E00029A32D /* KeychainService.swift in Sources */, B91F05BB2B04BA250029A32D /* NetworkManager.swift in Sources */, A46B5A7C2AE73CE600C874ED /* AudioPlayer.swift in Sources */, @@ -1441,6 +1448,7 @@ B9AD50542ADFE8A0006F18A1 /* SpeechRecognition.swift in Sources */, 224BECD72B20AC9100C84602 /* LogStore.swift in Sources */, B91F05C22B04C0D50029A32D /* CreateUser.swift in Sources */, + B9BB0FE22B459FB100E663F6 /* OpenAITextToSpeech.swift in Sources */, B9AD50612ADFE9B7006F18A1 /* Usage.swift in Sources */, B9BA337E2AE683EF00D7756D /* AudioPlayer.swift in Sources */, AC66DF302B2706E100DDC802 /* RandomTrivia.swift in Sources */, @@ -1469,6 +1477,7 @@ buildActionMask = 2147483647; files = ( F614BECA2B18E0FB00C71E32 /* AppleSignInManagerTests.swift in Sources */, + B9BB0FE32B459FB100E663F6 /* OpenAITextToSpeech.swift in Sources */, B3D0A3442AF29B1B00E8B0DA /* MockNetworkService.swift in Sources */, B919B7BF2AF3C3F7006335D1 /* AudioEngineAndSessionTests.swift in Sources */, 6EC2F6292B067E91002EFADD /* KeychainService.swift in Sources */, @@ -1607,6 +1616,7 @@ B3BC2CB42B05F4AF00A58477 /* NetworkManager.swift in Sources */, B9A3AE1E2B18CC7900C5FC66 /* RelayAuth.swift in Sources */, 6E53AF4A2AF0126E0022A8F2 /* VibrationManager.swift in Sources */, + B9BB0FE42B459FB100E663F6 /* OpenAITextToSpeech.swift in Sources */, F65054FF2B05269200FFEA07 /* xUIDebounceTests.swift in Sources */, 6E56C7C02B067AF100ED2296 /* AppleSignInManager.swift in Sources */, 6E53AF472AF0121E0022A8F2 /* AudioPlayer.swift in Sources */, diff --git a/voice/voice-ai/x/Actions/AudioPlayer.swift b/voice/voice-ai/x/Actions/AudioPlayer.swift index a2ab47ee9..17f60897d 100644 --- a/voice/voice-ai/x/Actions/AudioPlayer.swift +++ b/voice/voice-ai/x/Actions/AudioPlayer.swift @@ -64,6 +64,19 @@ class AudioPlayer: NSObject { SentrySDK.capture(message: "Error playing sound: \(error.localizedDescription)") } } + + func playSoundTTS(fromData data: Data) { + do { + audioPlayer = try AVAudioPlayer(data: data) + audioPlayer?.prepareToPlay() + audioPlayer?.numberOfLoops = 0 // No looping + audioPlayer?.play() + } catch { + self.logger.log("Error playing sound from data: \(error.localizedDescription)") + SentrySDK.capture(message: "Error playing sound from data: \(error.localizedDescription)") + } + } + func stopSound() { audioPlayer?.stop() diff --git a/voice/voice-ai/x/AppConfiguration/AppConfig.swift b/voice/voice-ai/x/AppConfiguration/AppConfig.swift index 810a74258..058ededf3 100644 --- a/voice/voice-ai/x/AppConfiguration/AppConfig.swift +++ b/voice/voice-ai/x/AppConfiguration/AppConfig.swift @@ -30,7 +30,8 @@ class AppConfig { private var paymentMode: String? var themeName: String? private var mixpanelToken: String? - + private var textToSpeechKey: String? + init(dic: [String: Any]? = nil, relay: RelayAuthProtocol? = nil) { loadConfiguration(dic: dic) @@ -143,6 +144,7 @@ class AppConfig { serverAPIKey = dictionary["SERVER_API_KEY"] as? String paymentMode = (dictionary["PAYMENT_MODE"] as? String) ?? "sandbox" mixpanelToken = (dictionary["MIXPANEL_TOKEN"] as? String) + textToSpeechKey = (dictionary["TEXT_TO_SPEECH_API_KEY"] as? String) // Convert the string values to Int if let eventsString = dictionary["MINIMUM_SIGNIFICANT_EVENTS"] as? String, @@ -301,6 +303,11 @@ class AppConfig { func getMixpanelToken() -> String? { return mixpanelToken } + + func getTextToSpeechKey() -> String? { + return textToSpeechKey + } + } extension AppConfig { diff --git a/voice/voice-ai/x/SpeechRecognition/OpenAITextToSpeech.swift b/voice/voice-ai/x/SpeechRecognition/OpenAITextToSpeech.swift new file mode 100644 index 000000000..392c5e10d --- /dev/null +++ b/voice/voice-ai/x/SpeechRecognition/OpenAITextToSpeech.swift @@ -0,0 +1,56 @@ + +import Foundation +import Sentry + +class OpenAITextToSpeech: NSObject { + func fetchAudioData(text: String, completion: @escaping (Result) -> Void) { + guard let url = URL(string: "https://api.openai.com/v1/audio/speech") else { + completion(.failure(NSError(domain: "OpenAITextToSpeechError", code: -1, userInfo: [NSLocalizedDescriptionKey: "Invalid URL"]))) + return + } + + guard let apiKey = AppConfig.shared.getTextToSpeechKey() else { + SentrySDK.capture(message: "OpenAI API key is nil") + return + } + + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.addValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") + request.addValue("application/json", forHTTPHeaderField: "Content-Type") + + let body: [String: Any] = [ + "model": "tts-1", + "input": text, + "voice": "nova" + ] + + do { + request.httpBody = try JSONSerialization.data(withJSONObject: body) + } catch { + completion(.failure(error)) + return + } + + URLSession.shared.dataTask(with: request) { data, response, error in + guard let data = data else { + completion(.failure(error ?? NSError(domain: "OpenAITextToSpeechError", code: 0, userInfo: nil))) + return + } + + if let httpResponse = response as? HTTPURLResponse, + httpResponse.statusCode == 200, + httpResponse.mimeType == "audio/mpeg" { + completion(.success(data)) + } else { + self.handleError(nil, message: "Received non-audio response or error from API") + } + }.resume() + } + + private func handleError(_ error: Error?, message: String) { + // Implement user-friendly error handling + print(message, error as Any) + // Additional error handling logic can be added here + } +} diff --git a/voice/voice-ai/x/SpeechRecognition/SpeechRecognition.swift b/voice/voice-ai/x/SpeechRecognition/SpeechRecognition.swift index 3ea2ea89b..a7c339c36 100644 --- a/voice/voice-ai/x/SpeechRecognition/SpeechRecognition.swift +++ b/voice/voice-ai/x/SpeechRecognition/SpeechRecognition.swift @@ -344,7 +344,9 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { timeLogger?.setTTSInit() if !isRepeatingCurrentSession { - textToSpeechConverter.convertTextToSpeech(text: response, timeLogger: timeLogger) + + // textToSpeechConverter.convertTextToSpeech(text: response, timeLogger: timeLogger) + self.textToSpeech(text: response) } completeResponse.append(response) logger.log("[Flush Response] \(response, privacy: .public)") @@ -493,6 +495,18 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { handleQuery(retryCount: maxRetry) } + func textToSpeech(text: String) { + OpenAITextToSpeech().fetchAudioData(text: text) { [weak self] result in + switch result { + case .success(let data): + print("Audio data fetched successfully") + self?.audioPlayer.playSoundTTS(fromData: data) + case .failure(let error): + print("Error fetching audio data: \(error)") + } + } + } + func cancelRetry() { logger.log("[cancelRetry]") retryWorkItem?.cancel() From 7ccab0c44d033b84c82bb31ae0e22eb74e102603 Mon Sep 17 00:00:00 2001 From: Nagesh Kumar Mishra Date: Fri, 5 Jan 2024 21:13:30 +0530 Subject: [PATCH 2/3] Whisper integration for Talk to me with queue. --- voice/voice-ai/x/Actions/AudioPlayer.swift | 31 +++-- .../Twitter/TwitterManager.swift | 19 ++- .../SpeechRecognition/SpeechRecognition.swift | 122 +++++++++++------- 3 files changed, 100 insertions(+), 72 deletions(-) diff --git a/voice/voice-ai/x/Actions/AudioPlayer.swift b/voice/voice-ai/x/Actions/AudioPlayer.swift index 17f60897d..b66ce9b93 100644 --- a/voice/voice-ai/x/Actions/AudioPlayer.swift +++ b/voice/voice-ai/x/Actions/AudioPlayer.swift @@ -29,24 +29,26 @@ class AVAudioSessionWrapper: AVAudioSessionProtocol { } } -class AudioPlayer: NSObject { +class AudioPlayer: NSObject, AVAudioPlayerDelegate { var logger = Logger( subsystem: Bundle.main.bundleIdentifier!, category: String(describing: "AVAudioSessionWrapper") ) var audioPlayer: AVAudioPlayer? var timer: Timer? - + var completion: (() -> Void)? + + func playSound(_ isLoop: Bool = true, _ resource: String = "beep") { playSoundWithSettings(isLoop, resource) } - + func playSoundWithSettings(_ loop: Bool = true, _ resource: String = "beep") { guard let soundURL = Bundle.main.url(forResource: resource, withExtension: "mp3") else { self.logger.log("Sound file not found") - + SentrySDK.capture(message: "Sound file not found") - + return } do { @@ -54,7 +56,7 @@ class AudioPlayer: NSObject { audioPlayer?.prepareToPlay() audioPlayer?.numberOfLoops = 0 // Play once, as the loop will be handled by the Timer audioPlayer?.play() - + // Schedule a Timer to play the sound every 2 seconds if loop { timer = Timer.scheduledTimer(timeInterval: 2, target: self, selector: #selector(playSoundWithDelay), userInfo: nil, repeats: true) @@ -65,28 +67,35 @@ class AudioPlayer: NSObject { } } - func playSoundTTS(fromData data: Data) { + func playSoundTTS(fromData data: Data, completion: @escaping () -> Void) { do { audioPlayer = try AVAudioPlayer(data: data) + audioPlayer?.delegate = self audioPlayer?.prepareToPlay() - audioPlayer?.numberOfLoops = 0 // No looping + audioPlayer?.numberOfLoops = 0 audioPlayer?.play() + self.completion = completion } catch { - self.logger.log("Error playing sound from data: \(error.localizedDescription)") + logger.log("Error playing sound from data: \(error.localizedDescription)") SentrySDK.capture(message: "Error playing sound from data: \(error.localizedDescription)") + completion() // Ensure to call completion even in case of an error } } - + func stopSound() { audioPlayer?.stop() timer?.invalidate() // Stop the timer when stopping the sound } - + @objc func playSoundWithDelay() { if audioPlayer?.isPlaying == false { audioPlayer?.currentTime = 0 audioPlayer?.play() } } + func audioPlayerDidFinishPlaying(_ player: AVAudioPlayer, successfully flag: Bool) { + completion?() + completion = nil + } } diff --git a/voice/voice-ai/x/NetworkManager/Twitter/TwitterManager.swift b/voice/voice-ai/x/NetworkManager/Twitter/TwitterManager.swift index 15902d29d..3a5f9fe91 100644 --- a/voice/voice-ai/x/NetworkManager/Twitter/TwitterManager.swift +++ b/voice/voice-ai/x/NetworkManager/Twitter/TwitterManager.swift @@ -60,12 +60,12 @@ class TwitterManager: ObservableObject { } } - func getAllTwitterListDetails(completion: @escaping (String) -> Void) { + func getAllTwitterListDetails(completion: @escaping ([String]) -> Void) { var details: [String] = [] // Create a DispatchGroup let dispatchGroup = DispatchGroup() - + for list in twitterLists { // Enter the DispatchGroup before starting each asynchronous task dispatchGroup.enter() @@ -73,9 +73,9 @@ class TwitterManager: ObservableObject { TwitterAPI().getTwitterListBy(name: list.name ?? "") { result in switch result { case .success(let fetchedLists): - let combinedText = fetchedLists.data.compactMap { $0.text }.joined(separator: "\n") - details.append(combinedText) - + let texts = fetchedLists.data.compactMap { $0.text } + details.append(contentsOf: texts) + case .failure(let error): print("Error fetching lists: \(error)") // Handle the error appropriately @@ -85,14 +85,11 @@ class TwitterManager: ObservableObject { dispatchGroup.leave() } } - + // Notify when all asynchronous tasks are completed dispatchGroup.notify(queue: .main) { - // Combine all the details into a single string - let finalDetails = details.joined(separator: "\n") - - // Call the completion handler with the final string - completion(finalDetails) + // Call the completion handler with the final tweets array + completion(details) } } } diff --git a/voice/voice-ai/x/SpeechRecognition/SpeechRecognition.swift b/voice/voice-ai/x/SpeechRecognition/SpeechRecognition.swift index a7c339c36..82399bfd8 100644 --- a/voice/voice-ai/x/SpeechRecognition/SpeechRecognition.swift +++ b/voice/voice-ai/x/SpeechRecognition/SpeechRecognition.swift @@ -114,6 +114,8 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { private var startTime: Date? let twitterManager = TwitterManager() + private var queue: [String] = [] + private var isAudioPlaying: Bool = false // MARK: - Initialization and Setup @@ -344,9 +346,8 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { timeLogger?.setTTSInit() if !isRepeatingCurrentSession { - // textToSpeechConverter.convertTextToSpeech(text: response, timeLogger: timeLogger) - self.textToSpeech(text: response) + self.processText(response) } completeResponse.append(response) logger.log("[Flush Response] \(response, privacy: .public)") @@ -495,17 +496,18 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { handleQuery(retryCount: maxRetry) } - func textToSpeech(text: String) { - OpenAITextToSpeech().fetchAudioData(text: text) { [weak self] result in - switch result { - case .success(let data): - print("Audio data fetched successfully") - self?.audioPlayer.playSoundTTS(fromData: data) - case .failure(let error): - print("Error fetching audio data: \(error)") - } - } - } + // Fetches audio data and plays it + private func fetchAndPlayAudio(for text: String, completion: @escaping () -> Void) { + OpenAITextToSpeech().fetchAudioData(text: text) { result in + switch result { + case .success(let data): + self.audioPlayer.playSoundTTS(fromData: data, completion: completion) + case .failure(let error): + print("Error: \(error)") + completion() // Proceed even in case of error + } + } + } func cancelRetry() { logger.log("[cancelRetry]") @@ -555,7 +557,7 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { func reset(feedback: Bool? = true) { logger.log("[reset]") - resetFavoriteKeys() + resetQueue() DispatchQueue.main.async { self.isThinking = false } @@ -921,50 +923,70 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { } func talkToMe() { - resetSkip() -// let followNews = SettingsBundleHelper.getFollowNews().flatMap { -// $0.isEmpty ? SettingsBundleHelper.DefaultFollowNews: $0 -// } ?? SettingsBundleHelper.DefaultFollowNews -// -// logger.log("[Data Feed] Populating User Field with \(followNews).") -// DataFeed.shared.getData(followNews: followNews) {data in -// if let data = data { -// SettingsBundleHelper.setUserProfile(profile: data) -// self.logger.log("[Data Feed] Fetched Data: \(data)") -// self.isTalktome = true -// self.currentText = data -// SpeechRecognition.shared.playText(text: data) -// } else { -// print("Failed to fetch or parse data.") -// } -// } - - twitterManager.getAllTwitterListDetails { result in - print("All Twitter list details:\n\(result)") - SpeechRecognition.shared.playText(text: result.isEmpty ? "There is some issue" : result) + resetSkip() + resetQueue() + // let followNews = SettingsBundleHelper.getFollowNews().flatMap { + // $0.isEmpty ? SettingsBundleHelper.DefaultFollowNews: $0 + // } ?? SettingsBundleHelper.DefaultFollowNews + // + // logger.log("[Data Feed] Populating User Field with \(followNews).") + // DataFeed.shared.getData(followNews: followNews) {data in + // if let data = data { + // SettingsBundleHelper.setUserProfile(profile: data) + // self.logger.log("[Data Feed] Fetched Data: \(data)") + // self.isTalktome = true + // self.currentText = data + // SpeechRecognition.shared.playText(text: data) + // } else { + // print("Failed to fetch or parse data.") + // } + // } + twitterManager.getAllTwitterListDetails {tweets in + print("All Twitter list details:\n\(tweets)") + self.startWithTweets(tweets) } } - func concatenateFavoriteValues() -> String { - let userDefaults = UserDefaults.standard - let keys = userDefaults.dictionaryRepresentation().keys - let favoriteKeys = keys.filter { $0.hasPrefix("combinedText_") } - let favoriteValues = favoriteKeys.compactMap { key -> String? in - userDefaults.string(forKey: key) + // Method to process a single string + func processText(_ text: String) { + enqueueText(text) + } + + func startWithTweets(_ tweets: [String]) { + resetQueue() + tweets.forEach { enqueueText($0) } + } + + private func enqueueText(_ text: String) { + if !text.isEmpty { + queue.append(text) + playNextItemIfPossible() } - let combinedString = favoriteValues.joined(separator: "\n") - return combinedString } - func resetFavoriteKeys() { - let userDefaults = UserDefaults.standard - let keys = userDefaults.dictionaryRepresentation().keys - let favoriteKeys = keys.filter { $0.hasPrefix("combinedText_") } - - for key in favoriteKeys { - userDefaults.removeObject(forKey: key) + private func playNextItemIfPossible() { + guard !isAudioPlaying, !queue.isEmpty else { return } + isAudioPlaying = true + let textToSpeak = queue.removeFirst() + OpenAITextToSpeech().fetchAudioData(text: textToSpeak) { [weak self] result in + switch result { + case .success(let data): + self?.audioPlayer.playSoundTTS(fromData: data) { + self?.isAudioPlaying = false + self?.playNextItemIfPossible() // Continue with next item + } + case .failure(let error): + print("Error fetching audio data: \(error)") + self?.isAudioPlaying = false + self?.playNextItemIfPossible() // Proceed to next item even in case of error + } } } + + private func resetQueue() { + queue.removeAll() + isAudioPlaying = false + } func playText(text: String) { self.logger.log("[Play Text]") From 1b66cefd539d27fc215bd63c013004045e8fd3d6 Mon Sep 17 00:00:00 2001 From: Nagesh Kumar Mishra Date: Mon, 8 Jan 2024 18:30:41 +0530 Subject: [PATCH 3/3] Added MP3 greeting message functionality and cancel method for OpenAITextToSpeech. Removed the download message for premium voice. Updated the UI for pause/play feature. --- .../Voice AI.xcodeproj/project.pbxproj | 6 ++ voice/voice-ai/x/Actions/AudioPlayer.swift | 2 - .../x/Converter/TextToSpeechConverter.swift | 28 ------ voice/voice-ai/x/Resources/Hey.mp3 | Bin 0 -> 7680 bytes .../OpenAITextToSpeech.swift | 15 ++- .../SpeechRecognition/SpeechRecognition.swift | 91 ++++++++++-------- 6 files changed, 68 insertions(+), 74 deletions(-) create mode 100644 voice/voice-ai/x/Resources/Hey.mp3 diff --git a/voice/voice-ai/Voice AI.xcodeproj/project.pbxproj b/voice/voice-ai/Voice AI.xcodeproj/project.pbxproj index f84c093fe..e86785d23 100644 --- a/voice/voice-ai/Voice AI.xcodeproj/project.pbxproj +++ b/voice/voice-ai/Voice AI.xcodeproj/project.pbxproj @@ -204,6 +204,8 @@ B97E04B72B3C14BC000C9FCA /* TwitterManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = B97E04B52B3C14BC000C9FCA /* TwitterManager.swift */; }; B97E04B82B3C14BC000C9FCA /* TwitterManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = B97E04B52B3C14BC000C9FCA /* TwitterManager.swift */; }; B97E04B92B3C14BC000C9FCA /* TwitterManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = B97E04B52B3C14BC000C9FCA /* TwitterManager.swift */; }; + B98288E92B4C132C00AA43C4 /* Hey.mp3 in Resources */ = {isa = PBXBuildFile; fileRef = B98288E82B4C132C00AA43C4 /* Hey.mp3 */; }; + B98288EA2B4C132C00AA43C4 /* Hey.mp3 in Resources */ = {isa = PBXBuildFile; fileRef = B98288E82B4C132C00AA43C4 /* Hey.mp3 */; }; B9A3ADEE2B15CCCF00C5FC66 /* ShareLinkTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = B9A3ADED2B15CCCE00C5FC66 /* ShareLinkTests.swift */; }; B9A3ADF02B15CD3500C5FC66 /* ActivityViewTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = B9A3ADEF2B15CD3500C5FC66 /* ActivityViewTests.swift */; }; B9A3AE182B18CC7900C5FC66 /* TimerManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = B9A3AE102B18CC7900C5FC66 /* TimerManager.swift */; }; @@ -465,6 +467,7 @@ B97E04AD2B358175000C9FCA /* TwitterAPI.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TwitterAPI.swift; sourceTree = ""; }; B97E04B32B3582F0000C9FCA /* TwitterModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TwitterModel.swift; sourceTree = ""; }; B97E04B52B3C14BC000C9FCA /* TwitterManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TwitterManager.swift; sourceTree = ""; }; + B98288E82B4C132C00AA43C4 /* Hey.mp3 */ = {isa = PBXFileReference; lastKnownFileType = audio.mp3; path = Hey.mp3; sourceTree = ""; }; B9A3ADED2B15CCCE00C5FC66 /* ShareLinkTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ShareLinkTests.swift; sourceTree = ""; }; B9A3ADEF2B15CD3500C5FC66 /* ActivityViewTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ActivityViewTests.swift; sourceTree = ""; }; B9A3AE102B18CC7900C5FC66 /* TimerManager.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TimerManager.swift; sourceTree = ""; }; @@ -742,6 +745,7 @@ B930AACE2ADD4BAF009F9F8C /* Resources */ = { isa = PBXGroup; children = ( + B98288E82B4C132C00AA43C4 /* Hey.mp3 */, A4680D9D2AE9C8A600F5F8ED /* beep.mp3 */, B930AACF2ADD4BAF009F9F8C /* Font */, ); @@ -1244,6 +1248,7 @@ buildActionMask = 2147483647; files = ( A44CFFB12B0D7200003D6822 /* AppConfig.plist in Resources */, + B98288EA2B4C132C00AA43C4 /* Hey.mp3 in Resources */, B3E8D1552B201E450041B82B /* Localizable.xcstrings in Resources */, E47F1A992B07F94F00455617 /* Dotrice-Regular.otf in Resources */, F67EDE382B07C67400FDEA80 /* logo.png in Resources */, @@ -1286,6 +1291,7 @@ files = ( CD0D13422ADA73B400031EDD /* Preview Assets.xcassets in Resources */, 22A9883B2AF8FE0D00A32B1A /* SyncedProducts.storekit in Resources */, + B98288E92B4C132C00AA43C4 /* Hey.mp3 in Resources */, A44CFFB02B0D7200003D6822 /* AppConfig.plist in Resources */, CD0D133E2ADA73B400031EDD /* Assets.xcassets in Resources */, B9AD506D2ADFEFDB006F18A1 /* Dotrice-Bold-Expanded.otf in Resources */, diff --git a/voice/voice-ai/x/Actions/AudioPlayer.swift b/voice/voice-ai/x/Actions/AudioPlayer.swift index b66ce9b93..c6811dae1 100644 --- a/voice/voice-ai/x/Actions/AudioPlayer.swift +++ b/voice/voice-ai/x/Actions/AudioPlayer.swift @@ -38,7 +38,6 @@ class AudioPlayer: NSObject, AVAudioPlayerDelegate { var timer: Timer? var completion: (() -> Void)? - func playSound(_ isLoop: Bool = true, _ resource: String = "beep") { playSoundWithSettings(isLoop, resource) } @@ -82,7 +81,6 @@ class AudioPlayer: NSObject, AVAudioPlayerDelegate { } } - func stopSound() { audioPlayer?.stop() timer?.invalidate() // Stop the timer when stopping the sound diff --git a/voice/voice-ai/x/Converter/TextToSpeechConverter.swift b/voice/voice-ai/x/Converter/TextToSpeechConverter.swift index 64235135d..78adbaebc 100644 --- a/voice/voice-ai/x/Converter/TextToSpeechConverter.swift +++ b/voice/voice-ai/x/Converter/TextToSpeechConverter.swift @@ -24,7 +24,6 @@ class TextToSpeechConverter: NSObject, TextToSpeechConverterProtocol { var isSpeaking: Bool { return synthesizer.isSpeaking } - var showDownloadVoicePromptCalled: Bool = false private(set) var isDefaultVoiceUsed = false let alertManager = AlertManager(viewControllerProvider: { @@ -120,33 +119,6 @@ class TextToSpeechConverter: NSObject, TextToSpeechConverterProtocol { synthesizer.continueSpeaking() } } - - func isPremiumOrEnhancedVoice(voiceIdentifier: String) -> Bool { - let lowercasedIdentifier = voiceIdentifier.lowercased() - return lowercasedIdentifier.contains("premium") - } - - func checkAndPromptForPremiumVoice(voiceIdentifier: String? = nil) { - guard let currentVoiceIdentifier = voiceIdentifier ?? AVSpeechSynthesisVoice(language: getLanguageCode())?.identifier else { - return - } - - print("currentVoice: \(currentVoiceIdentifier)") - print("Is the voice premium? \(isPremiumOrEnhancedVoice(voiceIdentifier: currentVoiceIdentifier))") - - if !isPremiumOrEnhancedVoice(voiceIdentifier: currentVoiceIdentifier) { - showDownloadVoicePrompt() - } - } - - func showDownloadVoicePrompt() { - // The prompt should guide the user on how to download a premium voice - DispatchQueue.main.asyncAfter(deadline: .now() + 10.0) { - let okAction = UIAlertAction(title: String(localized: "button.ok"), style: .default) - self.alertManager.showAlertForSettings(title: "Enhance Your Experience", message: "Download a premium voice for a better experience. Go to Settings > Accessibility > Spoken Content > Voices to choose and download a premium voice.", actions: [okAction]) - } - showDownloadVoicePromptCalled = true - } } extension TextToSpeechConverter: AVSpeechSynthesizerDelegate {} diff --git a/voice/voice-ai/x/Resources/Hey.mp3 b/voice/voice-ai/x/Resources/Hey.mp3 new file mode 100644 index 0000000000000000000000000000000000000000..f5656acc70a6b56187c3764cb134b486f57012e5 GIT binary patch literal 7680 zcmdtl`#;m|{{Zm!4rZHS%wa>?kVfb|Mu-|lbE-KXLYq?*nn-nbcegnXQ*($BeMYz) z%PG=v%sGT8qHa<`DtC0ctGjP~ACK=3-ygpJ!1wy?`r*1B*W>lPUa!}6{n&WX14L9> z!(jnq0II!)koeZLa94$u&MpxZVb3=x1$&h7bPYMp{hFhivYJMv=9lmM=XTkPWb^929S@^?7^Dh?W_hC)HA(5`mEzH1O<`{I&7B*+?OuK_i`#vpAUO6|u>Y=BnIW+kIj2 z^O}_c-7~h-y#IJW#$(Qu)p0pVRwlnzJ(6dI`T7Z8DR9!oN`EfE zxR_yAQ{_xz?J_xuqVucJ%z>7ktKFpJE$Z z+mjwY{c_Ii4TIfa(noyjus+-VvNP^r;iH)+mEKvop%lc8f1Pv{zwOmL_q4;f?CQt6 zkF_#L-wHiF_hdJ#jtARKUn=}@@7>qJTR#tW^t{98fA?NkEd05}*B^>~i^o6gQcaOp zG>7Ph=!0qwx`3GqHNo`x;#Nf~I-8ZEiKckwHJf>;nWeD1z0|E#tF0y;swnP3T+x=b zG~)gE>1&f@V@-x!ng-OOg2nuB2GO3*f-d=6MwBB@fX}560CNg#6XG%fqQAGLsCTg6 zw>SX6zPD>;Xm}9Xv@2HF$R{{YrPg7Rr^5X*gDf==gZZF+31|963037`L@Tqd;Z!rDSXX;W_=;io+nM&`tnqRH)etE z0u=xAXIxA`^3B+@^`?%a1@E*oM7mp<~jJ33}E^_>SPA&Hs z#NI#!B>apOON|WC1thXsa0i%hKPd3_G9O2SUNAE#mI!WzzUP|3fYVvI6@;BorDUNI zlKaco;zXsj4dmH0@L4h^D?r+_q%ZixT~2M?Wa~Z}#{}Yp5aW%|PEcCgu}X0yRS&gQ zZIVc{EO;=s;%K)rqr?O7sM?GC!kj#HzBkemIdZ`D_wdD*d<9w4bzO#v_71CUK&CKd zQ6?OGpFj;U-L|N0f!U{rHOI9Q6)B!H>|kk%C{QUKCGwwkXf6f`I)h9t%xIMU!YGfE z;w1-BMHebz0YS1BT<--hodK_y=VF<8K``E+6NU+5b&%SsNIxCPJTFWO?rD(%eFlGI zpB{Msa@&7c`%8`@d7A#wccKTY`=_&-+XZ$#;&Vm;y^H7Gx$5Md4JtS?1@C3_y>``K zuF@tEbXv{=?rN!lfy=cl0*cG{(D;C@*pRB0&^HQ}w!SmY96E zyh0N9+imyW32%s+oU{34M^Hr2WWKvhVaxhqIkFaVVscB2^{TmQa8AUoOQEmS&oE@qN}C@M7j_r7mk70Z7Rbekw8u8>URALT^N>*Y_%*+ z>zA)-LV>jn@QFHS1iioB#r)0fPFd>x7xof5k7&s>LgKPe0*INyE3s9@UL=+EHzZXT zAP#L8!VWD*Hx`R3u{#g&34nYMnqm$ZjZ}!b_uzk5V~x>N<4&?WbNlson&Y+f{1v!# zr?alwNr6XnXnI^0XC@7rj@4AcTyT&xs5lsS1O;q4Y+pToF?IE zmd~G4(TNfL9kzz&uDG8#`0%n$b#&;v0iXRUT99WKtEJ-WJTU`CJbL%e)IW67&!xuN z`Nf)JQz$+nXS;)-T1@rGHC4W>CM(byOc=QMLnO^9C!k&wP2}>9)CtlYO35 zg5b2?Bditvr3IQYmN!syl3=RQd=bgQLwV#Th)5F{p4GkjFeZ;i!!5#%v7JPO*y zkTHoa&sD;fMmdiqVojN`kw#8{dRF0Zvrol=6C2(*Gcc<`SuOrkpYU zdA1!-)a6ux7geSn1_$%H^z1Jri zFu+cb7CfZ^_daUaX~wL&aJ_0ygq?`o4(smlxl`Wsc#7ww&5(Z}+^dl0-+0o~;J`-v zvA2Gq|Nh(Yu03|a?MHByE$9Bl1B(0De1u}8Nq3mUkoB#1kSAAMqcd^HlA#;o;+|KZ z5kZv$PJPvlaCtVZZl;{qBhqw@oqhQoE{)w_2>P~srkm-XyL_sCc_w(q@yMwx0X=BD zarEvuxx9CtheBOG#_f4E#d+*1c|Q=mJ=4wa%0B}Y4X1~a0^Q=hIxMiWS3Ab97$*Oj zb^DqvlvQ;Q7Z-2~x<*$QXF23KGOJDFD}|w?`1w(*;0AB?S&By`MGME*?QFiEXah(7chk zBGq&z+;hL=DYm6c+egf4Chz0ek*>gz97Nr3hnMKCuVg^>fn*3SGuPanq*+{@#itGP z3I^mQp88IVFnT&!SwYS0IE|*nFErCx!IM3*&;$rMN}fzHuz(NjR}oS~S6r^&&%hCI z*{OF*A8ij)Tb!6l$Ux>j*nRC~=WGIYc6F?#47qW(W>f(eezG*Js_`NFY>DjkdN5=o~@RxuxZrWX1Js>C_ z=vp(if$z%g8K@q6N4>q#qc?oTyT7VU+Dw1^#i(x=at0UG@5zxe3kK-;`(cUwt@uyg z3h+s(6f&e=M-G$x@?|ClS<|4Ol0e!^E`*#~)5jZ*qS;45^}%&r9CC{c%_=6FkA{M{& zL13}v@j{Ke*`L4Qkvn;ua>FTPX(RBXu<>ZT(75l0E;{)ABxhY}%|G+O3Z6 za-9xMxTh2oCKSDK*4q-_&@omxztT6=l9Ol8Dm-;*xLQ^F>Prft)*<9*n zhqwv18rOSsD0c8m=nkt83uUgQQbMh_lq+F>ym_cyEpuchYWI{$4HGS6KiM<)W)w17 z)31jUsHVIa7_lAUKVtTYIDNDF6ULjm^tGWmfG=iPto zeD#qfx?8E;-Wo>bF~i}%d_9iAt*OAr&C*Pto@`n&n^n=Yp&|!j%NO+epjA41l6%Z|9LusF^gD%u+E6xbB*_CI1Jwr(y ziPvG2_rY&C0gILNxeuk6a-PD^W6}%#wsyN4*L|lij=lbYF%E~e%Up-m-qdw`Y%{^? zd^^ehK4k9v$D7p+^ABdB+e{iu*J>Xf-FY7fSzUFx+>jYvn>A=Weq!7ztx7JC9aGTh z1lM&9G(G`wrj}MUAMoyFzHI4Lkr8)#PHSjJ8b;P0o{(U@5AoR`hy1sv7T*{#X{1;VvdGTi~$(F()^u%QB zA0!QjtBD{4D#!I98?4&tY9@>4ImO*kv{?s8$=(b-LZ)w&qE?fWRm>QjF)z>@hp`sk+w^}MOW%ZBIfdmkVKck&?T+oUn#aD?|<^8;&yWL7?F_!nK?UH3)+-Oqxl!c7N+*Uy5N# z@)@{PT^l*&S7-X;#6LNny>a^;62MrSr>A4SXZvi|$(z0Kbg+8udk2ADy`^Y>?_1B+ zOYum3cA9@dUnOVF<7K3KSq63a$kvwmQF`rHbIm`(rWTZ5gI`}O^!(-LX{O&)aSfvm zv{omM75}p$^mK@q)qo|f9BuXu!~0K>K4MxmKS%#bukp_gVEETv-0|4=?0V$LGki|D z`RVrNZO`vVE#pmB;cx>UgMpV0qCTl);msg}ylK{@IZNj<+7nT|pjfy(B)W;pOcBxX zI;n&lTAWBDx$9o!QIec06|-M;BXI6Prdfq#rq@M~1yA;VXQo_m_0QZ+;~IU-TLl|R zN3muz3J$^zLc`M4Q}@|+J}=tFJFPN+a2^cc|ww;G*MN z{8s->^N5cL5$h&3FCY0G&1x`wDK7eOvZbo>@{t*v&T5gje8dg0MeYm|kjj16E+ACh z0-*v}HZxF)2bf*qqPNVh0g9W*c+@5fGx82!rDV3IL zM+@dQnc}fL8d+@^4ZP?AaBe8>3+ep4sbs(iXJ+6g%A{%vCl`sFMA}vg<1}`rD^|VL zCJ6@H_RiskKa>Os@@LMoiNGnCqTake0*x>Uv?wy26p^?zW&j!9W<-I6Qm*_XBagQ< z3<<;JO6Ww^T+a$Ds-I_qi!(ZZd&Al@ybB65&lOmcs7k*d5wSTQ1*xy>`P~UO=GuUu z{s<(wOJvXXCoRP1y|KbTn5GNIGT^-8ZgQp@WV3sdGH4(j+p8ct6+q!yh5C!DSXc=F$5t z_CH)6DpxeJDH^X?JFK=kT1>@JAC*jWfRX1Y#vK2&n%nSqyN`B0;kV5l?0;1_MZJbw zGz>ipUWS^{E9Ok|Wx`;rc4dtXRFf&_32puP@(1JLDYC*y&=%?d8&lSRk%&g1EKJPZ zLur+kPf|Z6+_a@)0boVx^=<`%imD7C55e$@B$lznR4%Rec|RLbK}k_p1KD9PWzdcT zpkkBD?RsN6BXiIq&?Q;KOfUvpNMp?&B=*VQZ;KbYlSyNX+P0t=rvlM%)Pm`6F;xl6 zX|R0;sm1wFx16v><7yf$D<3L?%KITQIy`alyz3sqjaN`veWKA!TyXk}1(beJzP_|g zw_=PxEcU`{MK{!$%fR_C7gao~`fqaZ$s;n%fP=Pbr1W3DKTfP!%>y6b-);6C2k+$JMd9LnO)InR`uIp z=+Ct6K2S|_M;&*MK#hJ&8va7yLS|AbSPN)`j$5ar3T0T{TZ$K9sP`7M{76DfSigwm z@Dk$;sSOX6Tya}y@=2%keWlBlW#@{S#f8q?&%3=Gg=opKw31y<5lb;cJFGaqoJfa# z2tY-p8)TSz_Sc&VshXx^@aI22a_LBJe)2*#gDu36(qtne^jxD^Imrtn(XKd~NRaOh35=KnZk2-dpQ&W%E^4dWofD-#LTeL=?IYtzu3J(S^ z-WCt)1V=o|eC=^{2kMVyF5_m+DPGk1kji0`jcU>G$JnKeJj*sWj#CNlY5H=9SL2y_ zIcpawfQ@3U_7^$Qy;V?IZH2%mlU7Hig&vq*D4{8BaXB3M#HsvQwRY>1LhokCpsr=B!3ZksT;i+F0*~bT2 zc^dPStFY4nQ=l_^(pX@Q9rl!S^d?{p*ZVJn&ThNyaPMmu0|eb@iohvUgKM%54ptgQ z*OXQ0H*zfTxj1w9s0OAwJWUhq!$TJ1X{=lUytfKl$U%^6I`vn9uAqWHQVMcYQ5V>| z1~Bl#6NaO~l=o^6*kY3pYF!%I90S7(%TB zZabe&WoacS>H{hwxYL0I(5h3)SlVSR;qKV(k&iz12h1U0K8rQAuBR{DlgqnR2CO;sY^UfZ)Ud%fW zgPM+Mkd07D_T{>`Z7kZDfP-={OGBWsrXe#C^_b86(HDubeJeP?>O(w^`B{P`tZRtAPBd3E0|(OFQ~V@ z_|>-KNAYeYeZ|9pVDApE)i)OOQmC) zLNzn&B3$_`sThEzov*PL2j#6BoQamNWimN8q0}^2G!R0ix|nVjp|%{v_QHg=Q03`5EqbkF{g8*w4m}h;bnr-GpZ`O`LB^_!5(|FD zv-fVR=Z#zC9te~XI+=dXFFS`(6a`pP=WoKQ_*|zCTp?|-j(PWH9dRb5O+c%K`AhaoIWDUD_v=u-pwq9TZCDo tBQdIFrq$1%UaId|uIDhp+GG_OXJdQ0s_ryW9jd_h|M2}^*#D0w{{@`!APWEh literal 0 HcmV?d00001 diff --git a/voice/voice-ai/x/SpeechRecognition/OpenAITextToSpeech.swift b/voice/voice-ai/x/SpeechRecognition/OpenAITextToSpeech.swift index 392c5e10d..b11fad503 100644 --- a/voice/voice-ai/x/SpeechRecognition/OpenAITextToSpeech.swift +++ b/voice/voice-ai/x/SpeechRecognition/OpenAITextToSpeech.swift @@ -2,7 +2,10 @@ import Foundation import Sentry -class OpenAITextToSpeech: NSObject { +class OpenAITextToSpeech: NSObject { + + var currentDataTask: URLSessionDataTask? + func fetchAudioData(text: String, completion: @escaping (Result) -> Void) { guard let url = URL(string: "https://api.openai.com/v1/audio/speech") else { completion(.failure(NSError(domain: "OpenAITextToSpeechError", code: -1, userInfo: [NSLocalizedDescriptionKey: "Invalid URL"]))) @@ -32,12 +35,11 @@ class OpenAITextToSpeech: NSObject { return } - URLSession.shared.dataTask(with: request) { data, response, error in + currentDataTask = URLSession.shared.dataTask(with: request) { data, response, error in guard let data = data else { completion(.failure(error ?? NSError(domain: "OpenAITextToSpeechError", code: 0, userInfo: nil))) return } - if let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200, httpResponse.mimeType == "audio/mpeg" { @@ -45,9 +47,14 @@ class OpenAITextToSpeech: NSObject { } else { self.handleError(nil, message: "Received non-audio response or error from API") } - }.resume() + } + currentDataTask?.resume() } + func cancelAudioDataFetch() { + currentDataTask?.cancel() + currentDataTask = nil + } private func handleError(_ error: Error?, message: String) { // Implement user-friendly error handling print(message, error as Any) diff --git a/voice/voice-ai/x/SpeechRecognition/SpeechRecognition.swift b/voice/voice-ai/x/SpeechRecognition/SpeechRecognition.swift index 82399bfd8..33c493160 100644 --- a/voice/voice-ai/x/SpeechRecognition/SpeechRecognition.swift +++ b/voice/voice-ai/x/SpeechRecognition/SpeechRecognition.swift @@ -116,6 +116,7 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { let twitterManager = TwitterManager() private var queue: [String] = [] private var isAudioPlaying: Bool = false + private let openAITextToSpeech = OpenAITextToSpeech() // MARK: - Initialization and Setup @@ -134,13 +135,12 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { func setup() { checkPermissionsAndSetupAudio() - textToSpeechConverter.convertTextToSpeech(text: greetingText, timeLogger: nil) + // textToSpeechConverter.convertTextToSpeech(text: greetingText, timeLogger: nil) + // self.processText(greetingText) + self.greetingMessage() isCapturing = true // startSpeechRecognition() setupTimer() - - // TODO: Place this method at right place - textToSpeechConverter.checkAndPromptForPremiumVoice() } private func setupTimer() { @@ -496,19 +496,6 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { handleQuery(retryCount: maxRetry) } - // Fetches audio data and plays it - private func fetchAndPlayAudio(for text: String, completion: @escaping () -> Void) { - OpenAITextToSpeech().fetchAudioData(text: text) { result in - switch result { - case .success(let data): - self.audioPlayer.playSoundTTS(fromData: data, completion: completion) - case .failure(let error): - print("Error: \(error)") - completion() // Proceed even in case of error - } - } - } - func cancelRetry() { logger.log("[cancelRetry]") retryWorkItem?.cancel() @@ -589,7 +576,8 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { DispatchQueue.main.async { if feedback == true { // Play the greeting text - self.textToSpeechConverter.convertTextToSpeech(text: self.greetingText, timeLogger: nil) + // self.textToSpeechConverter.convertTextToSpeech(text: self.greetingText, timeLogger: nil) + self.greetingMessage() DispatchQueue.main.asyncAfter(deadline: .now() + 1) { ReviewRequester.shared.logSignificantEvent() } @@ -597,6 +585,14 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { } } } + + func greetingMessage() { + self.synthesizeStart() + self.audioPlayer.playSound(false, self.greetingText) + DispatchQueue.main.asyncAfter(deadline: .now() + 0.10) { + self.synthesizeFinish() + } + } func checkContextChange() -> Bool { if conversation.isEmpty { @@ -796,6 +792,7 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { func speak() { timeLogger = TimeLogger(vendor: "openai", endpoint: "completion") timeLogger?.setAppRec() + resetQueue() DispatchQueue.main.async { self.isThinking = false } @@ -942,7 +939,7 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { // } // } twitterManager.getAllTwitterListDetails {tweets in - print("All Twitter list details:\n\(tweets)") + print("All Twitter list count: \(tweets.count)") self.startWithTweets(tweets) } } @@ -967,25 +964,32 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { private func playNextItemIfPossible() { guard !isAudioPlaying, !queue.isEmpty else { return } isAudioPlaying = true + let textToSpeak = queue.removeFirst() - OpenAITextToSpeech().fetchAudioData(text: textToSpeak) { [weak self] result in + openAITextToSpeech.fetchAudioData(text: textToSpeak) { [weak self] result in switch result { case .success(let data): + self?.synthesizeStart() self?.audioPlayer.playSoundTTS(fromData: data) { self?.isAudioPlaying = false + self?.synthesizeFinish() self?.playNextItemIfPossible() // Continue with next item } case .failure(let error): print("Error fetching audio data: \(error)") self?.isAudioPlaying = false + self?.synthesizeFinish() self?.playNextItemIfPossible() // Proceed to next item even in case of error } } } private func resetQueue() { + audioPlayer.stopSound() + openAITextToSpeech.cancelAudioDataFetch() queue.removeAll() isAudioPlaying = false + synthesizeFinish() } func playText(text: String) { @@ -1072,17 +1076,21 @@ class SpeechRecognition: NSObject, ObservableObject, SpeechRecognitionProtocol { self.textToSpeechConverter.convertTextToSpeech(text: self.limitReachedText, timeLogger: nil) } } -} - -// Extension for AVSpeechSynthesizerDelegate - -extension SpeechRecognition: AVSpeechSynthesizerDelegate { - func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) { - - if totalWordsToSkip > 0 { - applySkip() + + func synthesizeStart() { + isPlayingWorkItem?.cancel() + isPlayingWorkItem = DispatchWorkItem { [weak self] in + if self?._isPlaying == false { + self?.logger.log("[synthesizeStart]") + DispatchQueue.main.async { + self?._isPlaying = true + } + } } - + DispatchQueue.main.asyncAfter(deadline: .now() + 0.1, execute: isPlayingWorkItem!) + } + + func synthesizeFinish() { isPlayingWorkItem?.cancel() isPlayingWorkItem = DispatchWorkItem { [weak self] in if (self?._isPlaying) != nil { @@ -1093,6 +1101,18 @@ extension SpeechRecognition: AVSpeechSynthesizerDelegate { } } DispatchQueue.main.asyncAfter(deadline: .now() + 0.35, execute: isPlayingWorkItem!) + } +} + +// Extension for AVSpeechSynthesizerDelegate + +extension SpeechRecognition: AVSpeechSynthesizerDelegate { + func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) { + + if totalWordsToSkip > 0 { + applySkip() + } + synthesizeFinish() // TODO: to be used later for automatically resuming capturing when agent is not speaking // resumeListeningTimer = Timer.scheduledTimer(withTimeInterval: 0.5, repeats: false) { _ in @@ -1107,16 +1127,7 @@ extension SpeechRecognition: AVSpeechSynthesizerDelegate { } func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didStart utterance: AVSpeechUtterance) { - isPlayingWorkItem?.cancel() - isPlayingWorkItem = DispatchWorkItem { [weak self] in - if self?._isPlaying == false { - self?.logger.log("[synthesizeStart]") - DispatchQueue.main.async { - self?._isPlaying = true - } - } - } - DispatchQueue.main.asyncAfter(deadline: .now() + 0.1, execute: isPlayingWorkItem!) + self.synthesizeStart() audioPlayer.stopSound() pauseCapturing()