Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Delete download_worker if not nullptr in ModelDownloader destr… #10

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion data/locale/en-US.ini
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
none_no_input="No input"
Phonetic_Transcription="Phonetic Transcription"
phonetic_transcription_help="If enabled, the output will transformed phonetically e.g. 'ABC' will become 'aei bee see'."
File="File"
Text="Text"
Generate_Audio="Generate Audio"
Speaker_ID="Speaker ID"
Model="Model"
Delete_Cached_Models="Delete Cached Models"
Delete_Cached_Models="⚠️ Delete Cached Models ⚠️"
Speed="Speed"
Line_By_Line="Read Line By Line"
line_by_line_help="If enabled, the input text or file will be read line by line, otherwise, the entire input text or file will be read at once."
input_debounce_help="Enable waiting for input changes to end before the input text is processed. This is useful when typing or rapid changes appear in the input text or file."
input_debounce="Input Debounce"
Latency="Latency"
latency_help="The time in milliseconds to wait before emitting another audio batch."
Advanced="Advanced Settings"
Interrupt_Mode="Interrupt Mode"
interrupt_mode_help="If enabled, the audio generation will be interrupted when new generated audio comes in."
23 changes: 16 additions & 7 deletions src/audio-thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ void AudioThread::run()
emitFromBuffer();
}

// Perform the operation to be timed
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
// Calculate the time taken to process the audio samples
const auto end = std::chrono::high_resolution_clock::now();
const auto duration =
std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
const auto sleep_duration_ms =
std::chrono::milliseconds(TARGET_BATCH_SIZE_MS) - duration;

// Sleep for [TARGET_BATCH_SIZE_MS] minus the time taken to process the audio samples
std::this_thread::sleep_for(std::chrono::milliseconds(TARGET_BATCH_SIZE_MS) -
duration);
std::this_thread::sleep_for(sleep_duration_ms);
}
}

Expand All @@ -39,16 +41,23 @@ void AudioThread::emitFromBuffer()
// Lock the mutex
std::lock_guard<std::mutex> lock(mutex);

// Get 20ms audio samples from the buffer
const int target_number_of_samples = TARGET_BATCH_SIZE_MS * sample_rate / 1000;

// Get audio samples from the buffer
std::vector<float> samples;
for (int i = 0; i < TARGET_BATCH_SIZE_MS * sample_rate / 1000; i++) {
for (int i = 0; i < target_number_of_samples; i++) {
if (this->buffer.empty()) {
break;
}
samples.push_back(this->buffer.front());
this->buffer.pop_front();
}

// if needed - pad the samples with silence to reach the target batch size
while ((int)samples.size() < target_number_of_samples) {
samples.push_back(0.0f);
}

// Emit audio samples
emitAudioSamples(samples, sample_rate);
}
Expand Down
15 changes: 14 additions & 1 deletion src/audio-thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,34 @@ class AudioThread {
// Lock the mutex
std::lock_guard<std::mutex> lock(mutex);

if (interrupt_mode) {
// Clear the buffer if in interrupt mode
buffer.clear();
}

// Push audio samples to the buffer
for (auto sample : samples) {
buffer.push_back(sample);
}
}

void setSampleRate(int sample_rate_) { sample_rate = sample_rate_; }
void setTargetBatchSizeMs(int target_batch_size_ms)
{
TARGET_BATCH_SIZE_MS = target_batch_size_ms;
}
void setInterruptMode(bool interrupt_mode_) { interrupt_mode = interrupt_mode_; }

private:
const int TARGET_BATCH_SIZE_MS = 50;
int TARGET_BATCH_SIZE_MS = 50;

std::deque<float> buffer;
std::mutex mutex;
std::thread thread;
obs_source_t *context;
int sample_rate = 22050;
std::atomic<bool> running = false;
bool interrupt_mode = false;

void run();
void emitFromBuffer();
Expand Down
7 changes: 6 additions & 1 deletion src/model-utils/model-downloader-ui.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ void ModelDownloader::close()
{
this->mPrepareToClose = true;

// Stop the thread
this->download_thread->quit();

QDialog::close();
}

Expand Down Expand Up @@ -231,7 +234,9 @@ ModelDownloader::~ModelDownloader()
}
delete this->download_thread;
}
delete this->download_worker;
if (this->download_worker != nullptr) {
delete this->download_worker;
}
}

ModelDownloadWorker::~ModelDownloadWorker()
Expand Down
41 changes: 35 additions & 6 deletions src/squawk-source.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ void squawk_source_defaults(obs_data_t *settings)
obs_data_set_default_bool(settings, "line_by_line", false);
obs_data_set_default_bool(settings, "phonetic_transcription", true);
obs_data_set_default_bool(settings, "input_debounce", true);
obs_data_set_default_bool(settings, "interrupt_mode", false);
obs_data_set_default_int(settings, "latency", 50);
}

bool add_sources_to_list(void *list_property, obs_source_t *source)
Expand Down Expand Up @@ -200,15 +202,42 @@ obs_properties_t *squawk_source_properties(void *data)
original_text.c_str(), text.c_str());
}

generate_audio_from_text(squawk_data_->tts_context, text, speaker_id,
squawk_data_->speed);
std::thread audio_gen_thread([squawk_data_, text, speaker_id]() {
generate_audio_from_text(squawk_data_->tts_context, text,
speaker_id, squawk_data_->speed);
});
audio_gen_thread.detach();

return true;
});

// add advanced settings group
obs_properties_t *advanced_group = obs_properties_create();
obs_properties_add_group(ppts, "advanced", MT_("Advanced"), OBS_GROUP_NORMAL,
advanced_group);

// add boolean propery for enabling phonetic transcription
obs_properties_add_bool(advanced_group, "phonetic_transcription",
MT_("Phonetic_Transcription"));
// add info desxription for phonetic transcription
obs_property_set_long_description(obs_properties_get(advanced_group,
"phonetic_transcription"),
MT_("phonetic_transcription_help"));

// add boolean property for enabling interrupt mode
obs_properties_add_bool(advanced_group, "interrupt_mode", MT_("Interrupt_Mode"));
// add info description for interrupt mode
obs_property_set_long_description(obs_properties_get(advanced_group, "interrupt_mode"),
MT_("interrupt_mode_help"));

// add int slider for setting the latency
obs_properties_add_int_slider(advanced_group, "latency", MT_("Latency"), 10, 250, 10);
obs_property_set_long_description(obs_properties_get(advanced_group, "latency"),
MT_("latency_help"));

// add button for deleting all cached models
obs_properties_add_button(
ppts, "delete_models", MT_("Delete_Cached_Models"),
advanced_group, "delete_models", MT_("Delete_Cached_Models"),
[](obs_properties_t *props, obs_property_t *property, void *data_) {
UNUSED_PARAMETER(props);
UNUSED_PARAMETER(property);
Expand All @@ -225,9 +254,6 @@ obs_properties_t *squawk_source_properties(void *data)
return true;
});

// add boolean propery for enabling phonetic transcription
obs_properties_add_bool(ppts, "phonetic_transcription", MT_("Phonetic_Transcription"));

// add plugin info
char small_info[256];
snprintf(small_info, sizeof(small_info), PLUGIN_INFO_TEMPLATE, PLUGIN_VERSION);
Expand Down Expand Up @@ -268,6 +294,9 @@ void squawk_source_update(void *data, obs_data_t *settings)
init_sherpa_tts_context(squawk_data->tts_context, audio_samples_callback,
squawk_data);
}

squawk_data->audioThread->setTargetBatchSizeMs((int)obs_data_get_int(settings, "latency"));
squawk_data->audioThread->setInterruptMode(obs_data_get_bool(settings, "interrupt_mode"));
}

void squawk_source_activate(void *data)
Expand Down