From 42df5debcc6359ab18f80addd2fb54408a9e05f8 Mon Sep 17 00:00:00 2001
From: Kushal Jain <kushalj@apra.in>
Date: Tue, 27 Feb 2024 19:27:32 +0530
Subject: [PATCH] added EOS for small buffer size

---
 base/src/AudioToTextXForm.cpp        |  2 ++
 base/test/audioToTextXform_tests.cpp | 49 ++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)
diff --git a/base/src/AudioToTextXForm.cpp b/base/src/AudioToTextXForm.cpp
index 1e91fc0bd..d84a13073 100644
--- a/base/src/AudioToTextXForm.cpp
+++ b/base/src/AudioToTextXForm.cpp
@@ -160,7 +160,9 @@ bool AudioToTextXForm::process(frame_container& frames)
 	for (int index = 0; index < numberOfSamples; index++) {
 		mDetail->mInputAudioBuffer.push_back((float)constFloatPointer[index]/ 32768.0f);
 	}
+
 	if (mDetail->mInputAudioBuffer.size() < mDetail->mProps.bufferSize) {
+		sendEOS();
 		return true;
 	}
 	whisper_full(
diff --git a/base/test/audioToTextXform_tests.cpp b/base/test/audioToTextXform_tests.cpp
index 6875d7fa1..b566f5bb2 100644
--- a/base/test/audioToTextXform_tests.cpp
+++ b/base/test/audioToTextXform_tests.cpp
@@ -14,6 +14,7 @@
 #include "FileWriterModule.h"
 #include "AudioToTextXForm.h"
 #include "Module.h"
+#include "ExternalSinkModule.h"
 
 #include <unordered_map>
 #include <string>
@@ -223,5 +224,53 @@ BOOST_AUTO_TEST_CASE(change_unsupported_prop_asr)
     BOOST_CHECK_THROW(asr->setProps(propschange), std::runtime_error);
 }
 
+BOOST_AUTO_TEST_CASE(checkEOS_asr)
+{
+    std::vector<std::string> asrOutText = { "./data/asr_out.txt" };
+    Test_Utils::FileCleaner f(asrOutText);
+
+	Logger::setLogLevel(boost::log::trivial::severity_level::info);
+
+    // This is a PCM file without WAV header
+    auto fileReaderProps = FileReaderModuleProps("./data/audioToTextXform_test.pcm");
+    fileReaderProps.readLoop = false;
+    auto fileReader = boost::shared_ptr<FileReaderModule>(new FileReaderModule(fileReaderProps));
+    auto metadata = framemetadata_sp(new FrameMetadata(FrameMetadata::AUDIO));
+    auto pinId = fileReader->addOutputPin(metadata);
+   
+    auto asr = boost::shared_ptr<AudioToTextXForm>(new AudioToTextXForm(AudioToTextXFormProps(
+        AudioToTextXFormProps::DecoderSamplingStrategy::GREEDY
+        ,"./data/whisper/models/ggml-tiny.en-q8_0.bin",160000)));
+    fileReader->setNext(asr);
+
+    auto outputFile = boost::shared_ptr<FileWriterModule>(new FileWriterModule(FileWriterModuleProps(asrOutText[0], false)));
+    asr->setNext(outputFile);
+
+    auto sink = boost::shared_ptr<ExternalSinkModule>(new ExternalSinkModule());
+    asr->setNext(sink);
+
+    BOOST_TEST(fileReader->init());
+    BOOST_TEST(asr->init());
+    BOOST_TEST(outputFile->init());
+    BOOST_TEST(sink->init());
+
+    fileReader->step();
+    asr->step();
+
+    auto frames = sink->pop();
+    auto eosframe = frames.begin()->second;
+    BOOST_TEST(eosframe->isEOS());
+    
+    outputFile->step();
+
+    std::ifstream in_file_text(asrOutText[0]);
+    std::ostringstream  buffer;
+    buffer << in_file_text.rdbuf();
+    std:string output = " The Matic speech recognition also known as ASR is the use of machine learning or artificial intelligence technology to process human speech into readable text.";
+    double thres = 0;
+    BOOST_TEST(cosineSimilarity(buffer.str(), output) == thres);
+    // BOOST_TEST(buffer.str() == output);
+    in_file_text.close();
+}
 
 BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file