Skip to content

Commit

Permalink
added EOS for small buffer size
Browse files Browse the repository at this point in the history
  • Loading branch information
kushaljain-apra committed Feb 27, 2024
1 parent ad0977b commit 42df5de
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 0 deletions.
2 changes: 2 additions & 0 deletions base/src/AudioToTextXForm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,9 @@ bool AudioToTextXForm::process(frame_container& frames)
for (int index = 0; index < numberOfSamples; index++) {
mDetail->mInputAudioBuffer.push_back((float)constFloatPointer[index]/ 32768.0f);
}

if (mDetail->mInputAudioBuffer.size() < mDetail->mProps.bufferSize) {
sendEOS();
return true;
}
whisper_full(
Expand Down
49 changes: 49 additions & 0 deletions base/test/audioToTextXform_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "FileWriterModule.h"
#include "AudioToTextXForm.h"
#include "Module.h"
#include "ExternalSinkModule.h"

#include <unordered_map>
#include <string>
Expand Down Expand Up @@ -223,5 +224,53 @@ BOOST_AUTO_TEST_CASE(change_unsupported_prop_asr)
BOOST_CHECK_THROW(asr->setProps(propschange), std::runtime_error);
}

BOOST_AUTO_TEST_CASE(checkEOS_asr)
{
std::vector<std::string> asrOutText = { "./data/asr_out.txt" };
Test_Utils::FileCleaner f(asrOutText);

Logger::setLogLevel(boost::log::trivial::severity_level::info);

// This is a PCM file without WAV header
auto fileReaderProps = FileReaderModuleProps("./data/audioToTextXform_test.pcm");
fileReaderProps.readLoop = false;
auto fileReader = boost::shared_ptr<FileReaderModule>(new FileReaderModule(fileReaderProps));
auto metadata = framemetadata_sp(new FrameMetadata(FrameMetadata::AUDIO));
auto pinId = fileReader->addOutputPin(metadata);

auto asr = boost::shared_ptr<AudioToTextXForm>(new AudioToTextXForm(AudioToTextXFormProps(
AudioToTextXFormProps::DecoderSamplingStrategy::GREEDY
,"./data/whisper/models/ggml-tiny.en-q8_0.bin",160000)));
fileReader->setNext(asr);

auto outputFile = boost::shared_ptr<FileWriterModule>(new FileWriterModule(FileWriterModuleProps(asrOutText[0], false)));
asr->setNext(outputFile);

auto sink = boost::shared_ptr<ExternalSinkModule>(new ExternalSinkModule());
asr->setNext(sink);

BOOST_TEST(fileReader->init());
BOOST_TEST(asr->init());
BOOST_TEST(outputFile->init());
BOOST_TEST(sink->init());

fileReader->step();
asr->step();

auto frames = sink->pop();
auto eosframe = frames.begin()->second;
BOOST_TEST(eosframe->isEOS());

outputFile->step();

std::ifstream in_file_text(asrOutText[0]);
std::ostringstream buffer;
buffer << in_file_text.rdbuf();
std:string output = " The Matic speech recognition also known as ASR is the use of machine learning or artificial intelligence technology to process human speech into readable text.";
double thres = 0;
BOOST_TEST(cosineSimilarity(buffer.str(), output) == thres);
// BOOST_TEST(buffer.str() == output);
in_file_text.close();
}

BOOST_AUTO_TEST_SUITE_END()

0 comments on commit 42df5de

Please sign in to comment.