From 3aee8a458a0ad81f4a6243094383bc9968bd103a Mon Sep 17 00:00:00 2001 From: Fedir Zadniprovskyi Date: Tue, 16 Jul 2024 07:20:06 -0700 Subject: [PATCH] docs: add js example Addresses #26 --- examples/javascript/index.js | 159 +++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 examples/javascript/index.js diff --git a/examples/javascript/index.js b/examples/javascript/index.js new file mode 100644 index 00000000..679f4f0d --- /dev/null +++ b/examples/javascript/index.js @@ -0,0 +1,159 @@ +/** + * Example provided by https://github.com/Gan-Xing in https://github.com/fedirz/faster-whisper-server/issues/26 + */ +import fs from 'fs'; +import WebSocket from 'ws'; +import fetch from 'node-fetch'; +import FormData from 'form-data'; +import path from 'path'; +import ffmpeg from 'fluent-ffmpeg'; +import dotenv from 'dotenv'; + +dotenv.config(); + +const ffmpegPath = process.env.FFMPEG_PATH || '/usr/bin/ffmpeg'; +ffmpeg.setFfmpegPath(ffmpegPath); + +/** + * Transcribe an audio file using the HTTP endpoint. + * Supported file types include wav, mp3, webm, and other types supported by the OpenAI API. + * I have tested with these three types. + * + * @param {string} filePath - Path to the audio file + * @param {string} model - Model name + * @param {string} language - Language code + * @param {string} responseFormat - Response format + * @param {string} temperature - Temperature setting + */ +async function transcribeFile(filePath, model, language, responseFormat, temperature) { + const formData = new FormData(); + formData.append('file', fs.createReadStream(filePath)); + formData.append('model', model); + formData.append('language', language); + formData.append('response_format', responseFormat); + formData.append('temperature', temperature); + + const response = await fetch(`${process.env.TRANSCRIPTION_API_BASE_URL}/v1/audio/transcriptions`, { + method: 'POST', + body: formData, + }); + + const transcription = await response.json(); + console.log('Transcription Response:', transcription); +} + +/** + * Translate an audio file using the HTTP endpoint. + * Only English is supported for translation. + * Currently, I am using GLM-4-9b-int8 to translate various voices. + * I am not sure if the author can add an endpoint for custom API+Key translation. + * I plan to package my frontend, fast-whisper-server, and vllm+glm-4-9b-int8 into one Docker container for unified deployment. + * + * @param {string} filePath - Path to the audio file + * @param {string} model - Model name + * @param {string} responseFormat - Response format + * @param {string} temperature - Temperature setting + */ +async function translateFile(filePath, model, responseFormat, temperature) { + const formData = new FormData(); + formData.append('file', fs.createReadStream(filePath)); + formData.append('model', model); + formData.append('response_format', responseFormat); + formData.append('temperature', temperature); + + const response = await fetch(`${process.env.TRANSLATION_API_BASE_URL}/v1/audio/translations`, { + method: 'POST', + body: formData, + }); + + const translation = await response.json(); + console.log('Translation Response:', translation); +} + +/** + * Send audio data over WebSocket for transcription. + * Currently, the supported file type for transcription is PCM. + * I am not sure if other types are supported. + * + * @param {string} filePath - Path to the audio file + * @param {string} model - Model name + * @param {string} language - Language code + * @param {string} responseFormat - Response format + * @param {string} temperature - Temperature setting + */ +async function sendAudioOverWebSocket(filePath, model, language, responseFormat, temperature) { + const wsUrl = `ws://100.105.162.69:8000/v1/audio/transcriptions?model=${encodeURIComponent(model)}&language=${encodeURIComponent(language)}&response_format=${encodeURIComponent(responseFormat)}&temperature=${encodeURIComponent(temperature)}`; + const ws = new WebSocket(wsUrl); + + ws.on('open', async () => { + const audioBuffer = fs.readFileSync(filePath); + ws.send(audioBuffer); + }); + + ws.on('message', (message) => { + const response = JSON.parse(message); + console.log('WebSocket Response:', response); + }); + + ws.on('close', () => { + console.log('WebSocket connection closed'); + }); + + ws.on('error', (error) => { + console.error('WebSocket error:', error); + }); +} + +/** + * Convert audio file to PCM format. + * + * @param {string} filePath - Path to the audio file + * @returns {string} - Path to the converted PCM file + */ +async function convertToPcm(filePath) { + const pcmFilePath = filePath.replace(path.extname(filePath), '.pcm'); + + await new Promise((resolve, reject) => { + ffmpeg(filePath) + .audioChannels(1) + .audioFrequency(16000) + .audioCodec('pcm_s16le') + .toFormat('s16le') + .on('end', () => { + console.log(`Audio file successfully converted to PCM: ${pcmFilePath}`); + resolve(pcmFilePath); + }) + .on('error', (error) => { + console.error(`Error converting audio to PCM: ${error.message}`); + reject(error); + }) + .save(pcmFilePath); + }); + + return pcmFilePath; +} + +async function main() { + const model = 'Systran/faster-whisper-large-v3'; + const language = 'en'; + const responseFormat = 'json'; + const temperature = '0'; + const filePath = './path/to/your/audio.webm'; // Replace with the actual file path + + // Convert the audio file to PCM format + const pcmFilePath = await convertToPcm(filePath); + + // Transcribe the audio file using the HTTP endpoint + await transcribeFile(pcmFilePath, model, language, responseFormat, temperature); + + // Translate the audio file using the HTTP endpoint + await translateFile(pcmFilePath, model, responseFormat, temperature); + + // Transcribe the audio file using the WebSocket endpoint + await sendAudioOverWebSocket(pcmFilePath, model, language, responseFormat, temperature); +} + +// Make sure to use ffmpeg version 7 or above. The default apt-get install only installs version 4.x. Also, Ubuntu 22.04 or above is required to support version 7.x. +main().catch(console.error); + +// Project URL: https://github.com/Gan-Xing/whisper