diff --git a/ios/voicebox/Constants.h b/ios/voicebox/Constants.h index 97c29af..126bc88 100644 --- a/ios/voicebox/Constants.h +++ b/ios/voicebox/Constants.h @@ -16,6 +16,7 @@ #define ACTION_BUTTON_UICOLOR [UIColor colorWithRed:0.4 green:0.502 blue:0.694 alpha:1.0] #define ACTION_BUTTON_HIGHLIGHT_UICOLOR [UIColor colorWithRed:0.32 green:0.402 blue:0.555 alpha:1.0] #define ACTION_BUTTON_DISABLED_UICOLOR [[UIColor systemGray4Color] colorWithAlphaComponent:0.6] +#define KEYBOARD_BUTTON_UICOLOR [UIColor colorWithRed:0.6 green:0.702 blue:0.894 alpha:1.0] #define OPEN_AI_API_TIMEOUT_SECONDS 12.0 diff --git a/ios/voicebox/UI/ListenViewController.m b/ios/voicebox/UI/ListenViewController.m index 30d0d31..7a85118 100644 --- a/ios/voicebox/UI/ListenViewController.m +++ b/ios/voicebox/UI/ListenViewController.m @@ -15,6 +15,7 @@ @interface ListenViewController () @property (nonatomic, weak) UILabel *loadingLabel, *transcriptLabel; @property (nonatomic, weak) UIActivityIndicatorView* spinner; @property (nonatomic, weak) UIButton* closeBtn; +@property (nonatomic, strong) NSArray* priorSegments; @end @@ -41,7 +42,7 @@ - (void)viewDidLoad UILabel* transcriptLabel = [[UILabel alloc] init]; transcriptLabel.text = @""; - transcriptLabel.font = [UIFont systemFontOfSize:MAX(22.0, [UIFont labelFontSize])]; + transcriptLabel.font = [UIFont systemFontOfSize:MAX(16.0, [UIFont labelFontSize])]; transcriptLabel.textColor = [UIColor systemGrayColor]; transcriptLabel.translatesAutoresizingMaskIntoConstraints = NO; transcriptLabel.lineBreakMode = NSLineBreakByWordWrapping; @@ -119,11 +120,50 @@ - (void)stateUpdate:(bool)running segments:(NSArray*)segments weakself.loadingLabel.text = @"Listening..."; if (segments) { - NSString* transcript = @""; - for (NSString* segment in segments) { - transcript = [transcript stringByAppendingString:segment]; + NSString* liveContent = @""; + NSString* readyToProcess = @""; + /* + NSString* transcript = @""; + for (NSString* segment in segments) { + NSString* segmentString = [NSString stringWithFormat:@" - %@\n", segment]; + transcript = [transcript stringByAppendingString:segmentString]; + }*/ + // weakself.transcriptLabel.text = transcript; + + NSArray* priorSegments = weakself.priorSegments; + if (priorSegments) { + // Check if prior segments don't match, with exclusion of last segment + for (int i = 0; i < (int)priorSegments.count - 2; i++) { + NSString* priorSegment = priorSegments[i]; + NSString* curSegment = segments[i]; + if (![priorSegment isEqualToString:curSegment]) { + /* Findings: + - very often changing last segment, it's the "in progress" segment. + - pretty often changing n-1 segment -- not stable + - rarely but sometimes changes n-2 segment. Doesn't seem to be any stability guaruntee. Changes I've seen: + - correct a word + - fix punctuation + - move word from one segment to another + */ + NSLog(@"segment changed!\nPrior: %@\nCurrent: %@", priorSegment, curSegment); + } + } } - weakself.transcriptLabel.text = transcript; + + for (int i = 0; i < segments.count; i++) { + NSString* segment = segments[i]; + + if (priorSegments && i < (int)priorSegments.count - 2) { + // stable enough, ready to process this + readyToProcess = [readyToProcess stringByAppendingString:segment]; + } else { + liveContent = [liveContent stringByAppendingString:segment]; + } + } + + weakself.transcriptLabel.text = [NSString stringWithFormat:@"Ready to Process:\n%@\n\nLive:\n%@", readyToProcess, liveContent]; + + weakself.priorSegments = segments; } }); } diff --git a/ios/voicebox/UI/VBButton.h b/ios/voicebox/UI/VBButton.h index db5fbff..b384ab8 100644 --- a/ios/voicebox/UI/VBButton.h +++ b/ios/voicebox/UI/VBButton.h @@ -15,6 +15,7 @@ NS_ASSUME_NONNULL_BEGIN - (instancetype)initOptionButtonWithTitle:(NSString*)title; - (instancetype)initOptionCancelButton; +- (instancetype)initKeyboardButton; @end diff --git a/ios/voicebox/UI/VBButton.m b/ios/voicebox/UI/VBButton.m index f3adf7e..ac73607 100644 --- a/ios/voicebox/UI/VBButton.m +++ b/ios/voicebox/UI/VBButton.m @@ -62,6 +62,20 @@ - (instancetype)initOptionCancelButton return self; } +- (instancetype)initKeyboardButton +{ + self = [super init]; + if (self) { + UIButtonConfiguration* config = UIButtonConfiguration.grayButtonConfiguration; + config.attributedTitle = [[NSAttributedString alloc] initWithString:@"" attributes:@{ NSFontAttributeName : [UIFont systemFontOfSize:MAX([UIFont labelFontSize], 24.0)] }]; + config.contentInsets = NSDirectionalEdgeInsetsMake(16, 16, 16, 16); + ; + config.baseBackgroundColor = [UIColor systemGray6Color]; + self.configuration = config; + } + return self; +} + - (void)setupStandardActionButtonColor { self.backgroundColor = ACTION_BUTTON_UICOLOR; diff --git a/ios/voicebox/Util/VBSpeechSynthesizer.m b/ios/voicebox/Util/VBSpeechSynthesizer.m index 4997763..74caab6 100644 --- a/ios/voicebox/Util/VBSpeechSynthesizer.m +++ b/ios/voicebox/Util/VBSpeechSynthesizer.m @@ -36,8 +36,18 @@ - (void)speak:(NSString*)textToSpeak // TODO -- specify voice. List all with AVSpeechSynthesisVoice.speechVoices, find // highest quality matching curent locale. Save result for next time. + // AVSpeechSynthesisVoice* voice = [AVSpeechSynthesisVoice voiceWithLanguage:@"en-US"]; + // voice = [AVSpeechSynthesisVoice voiceWithIdentifier:@"com.apple.speech.synthesis.voice.Fred"]; + // en-US, Name: Fred, Quality: Default [com.apple.speech.synthesis.voice.Fred] AVSpeechSynthesisVoice* voice = [[AVSpeechSynthesisVoice alloc] init]; - ; + /*NSArray* speechVoices = [AVSpeechSynthesisVoice speechVoices]; + NSLog(@"Voices: (%d) %@", speechVoices.count, speechVoices); + for (AVSpeechSynthesisVoice* candidateVoice in speechVoices) { + if (candidateVoice.quality > voice.quality) { + // Need to check gender, and region + voice = candidateVoice; + } + }*/ utterance.voice = voice; // Create a speech synthesizer if not available. May be removed under memory presure so always check. diff --git a/ios/voicebox/Whisper/VBAudioListener.m b/ios/voicebox/Whisper/VBAudioListener.m index cb430f6..a8814af 100644 --- a/ios/voicebox/Whisper/VBAudioListener.m +++ b/ios/voicebox/Whisper/VBAudioListener.m @@ -271,11 +271,12 @@ - (IBAction)onTranscribe params.print_special = false; params.translate = false; params.language = "en"; + // TODO P1: `params.suppress_non_speech_tokens = true;` once it makes it to stable release params.n_threads = max_threads; // TODO: think we're processing whole thing each time? params.offset_ms = 0; params.no_context = true; - params.single_segment = true; + params.single_segment = false; CFTimeInterval startTime = CACurrentMediaTime(); diff --git a/morse/Morse.xcodeproj/project.pbxproj b/morse/Morse.xcodeproj/project.pbxproj index 4a7454f..4101019 100644 --- a/morse/Morse.xcodeproj/project.pbxproj +++ b/morse/Morse.xcodeproj/project.pbxproj @@ -407,7 +407,7 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - IPHONEOS_DEPLOYMENT_TARGET = 16.4; + IPHONEOS_DEPLOYMENT_TARGET = 16.0; MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; MTL_FAST_MATH = YES; ONLY_ACTIVE_ARCH = YES; @@ -459,7 +459,7 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - IPHONEOS_DEPLOYMENT_TARGET = 16.4; + IPHONEOS_DEPLOYMENT_TARGET = 16.0; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; SDKROOT = iphoneos; diff --git a/morse/Morse/AppDelegate.h b/morse/Morse/AppDelegate.h index 90e99a3..32b81aa 100644 --- a/morse/Morse/AppDelegate.h +++ b/morse/Morse/AppDelegate.h @@ -9,6 +9,7 @@ @interface AppDelegate : UIResponder +@property (strong, nonatomic) UIWindow *window; @end diff --git a/morse/Morse/BaseTestViewController.m b/morse/Morse/BaseTestViewController.m index f7abbbf..7cc4165 100644 --- a/morse/Morse/BaseTestViewController.m +++ b/morse/Morse/BaseTestViewController.m @@ -293,7 +293,7 @@ -(void) selectNextTestButton { // highlight button UIButtonConfiguration* config = UIButtonConfiguration.grayButtonConfiguration; - config.baseBackgroundColor = [UIColor greenColor]; + config.baseBackgroundColor = [UIColor systemBlueColor]; _currentTestTarget.configuration = config; }