From 42df5debcc6359ab18f80addd2fb54408a9e05f8 Mon Sep 17 00:00:00 2001 From: Kushal Jain Date: Tue, 27 Feb 2024 19:27:32 +0530 Subject: [PATCH] added EOS for small buffer size --- base/src/AudioToTextXForm.cpp | 2 ++ base/test/audioToTextXform_tests.cpp | 49 ++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/base/src/AudioToTextXForm.cpp b/base/src/AudioToTextXForm.cpp index 1e91fc0bd..d84a13073 100644 --- a/base/src/AudioToTextXForm.cpp +++ b/base/src/AudioToTextXForm.cpp @@ -160,7 +160,9 @@ bool AudioToTextXForm::process(frame_container& frames) for (int index = 0; index < numberOfSamples; index++) { mDetail->mInputAudioBuffer.push_back((float)constFloatPointer[index]/ 32768.0f); } + if (mDetail->mInputAudioBuffer.size() < mDetail->mProps.bufferSize) { + sendEOS(); return true; } whisper_full( diff --git a/base/test/audioToTextXform_tests.cpp b/base/test/audioToTextXform_tests.cpp index 6875d7fa1..b566f5bb2 100644 --- a/base/test/audioToTextXform_tests.cpp +++ b/base/test/audioToTextXform_tests.cpp @@ -14,6 +14,7 @@ #include "FileWriterModule.h" #include "AudioToTextXForm.h" #include "Module.h" +#include "ExternalSinkModule.h" #include #include @@ -223,5 +224,53 @@ BOOST_AUTO_TEST_CASE(change_unsupported_prop_asr) BOOST_CHECK_THROW(asr->setProps(propschange), std::runtime_error); } +BOOST_AUTO_TEST_CASE(checkEOS_asr) +{ + std::vector asrOutText = { "./data/asr_out.txt" }; + Test_Utils::FileCleaner f(asrOutText); + + Logger::setLogLevel(boost::log::trivial::severity_level::info); + + // This is a PCM file without WAV header + auto fileReaderProps = FileReaderModuleProps("./data/audioToTextXform_test.pcm"); + fileReaderProps.readLoop = false; + auto fileReader = boost::shared_ptr(new FileReaderModule(fileReaderProps)); + auto metadata = framemetadata_sp(new FrameMetadata(FrameMetadata::AUDIO)); + auto pinId = fileReader->addOutputPin(metadata); + + auto asr = boost::shared_ptr(new AudioToTextXForm(AudioToTextXFormProps( + AudioToTextXFormProps::DecoderSamplingStrategy::GREEDY + ,"./data/whisper/models/ggml-tiny.en-q8_0.bin",160000))); + fileReader->setNext(asr); + + auto outputFile = boost::shared_ptr(new FileWriterModule(FileWriterModuleProps(asrOutText[0], false))); + asr->setNext(outputFile); + + auto sink = boost::shared_ptr(new ExternalSinkModule()); + asr->setNext(sink); + + BOOST_TEST(fileReader->init()); + BOOST_TEST(asr->init()); + BOOST_TEST(outputFile->init()); + BOOST_TEST(sink->init()); + + fileReader->step(); + asr->step(); + + auto frames = sink->pop(); + auto eosframe = frames.begin()->second; + BOOST_TEST(eosframe->isEOS()); + + outputFile->step(); + + std::ifstream in_file_text(asrOutText[0]); + std::ostringstream buffer; + buffer << in_file_text.rdbuf(); + std:string output = " The Matic speech recognition also known as ASR is the use of machine learning or artificial intelligence technology to process human speech into readable text."; + double thres = 0; + BOOST_TEST(cosineSimilarity(buffer.str(), output) == thres); + // BOOST_TEST(buffer.str() == output); + in_file_text.close(); +} BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file