From 95164297ff134b170b55cca816a8cc847719233f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Drago=C8=99=20B=C4=83lan?= <33976463+greenw0lf@users.noreply.github.com> Date: Mon, 23 Dec 2024 16:23:40 +0100 Subject: [PATCH] Add duration of audio and VAD removed duration to BatchedInferencePipeline (#1186) Co-authored-by: MahmoudAshraf97 --- faster_whisper/transcribe.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index 42272caf..d270bdf4 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -388,6 +388,10 @@ def transcribe( audio = decode_audio(audio, sampling_rate=sampling_rate) duration = audio.shape[0] / sampling_rate + self.model.logger.info( + "Processing audio with duration %s", format_timestamp(duration) + ) + chunk_length = chunk_length or self.model.feature_extractor.chunk_length # if no segment split is provided, use vad_model and generate segments if not clip_timestamps: @@ -421,6 +425,11 @@ def transcribe( / sampling_rate ) + self.model.logger.info( + "VAD filter removed %s of audio", + format_timestamp(duration - duration_after_vad), + ) + audio_chunks, chunks_metadata = collect_chunks(audio, clip_timestamps) features = ( [self.model.feature_extractor(chunk)[..., :-1] for chunk in audio_chunks]