From 649f67e2ca21289069cbcd1158f4e38deabb52c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lui=CC=81s=20Arteiro?= Date: Wed, 13 Mar 2024 02:28:16 +0000 Subject: [PATCH] chore: Minor tweaks. --- README.md | 384 +++++++++++++++------------------------ assets/js/micro.js | 15 +- assets/package-lock.json | 23 +++ assets/pnpm-lock.yaml | 23 --- 4 files changed, 178 insertions(+), 267 deletions(-) create mode 100644 assets/package-lock.json delete mode 100644 assets/pnpm-lock.yaml diff --git a/README.md b/README.md index 9b284d5..70c56e4 100644 --- a/README.md +++ b/README.md @@ -3443,46 +3443,51 @@ so this part of your code will shrink to: #### 2.2 Defining `Javascript` hook -We provide a basic user experience: +Currently, we provide a basic user experience: we let the user click on a button to start and stop the recording. -We carefully worked on the size of the image file -we used for the ML captioning model -to lower the treatment latency. +When doing image captioning, +we carefully worked on the size of the image file +used for the captioning model +to optimize the app's latency. + In the same spirit, we can downsize the original audio file -to improve the ML treatment latency. +so it's easier on the model to process it. +This will have the benefit of less overhead in our application. -The main paramters are: +The main parameters we're dealing with are: -- lower the sampling rate (the higher the more accurate), -- use mono instead of stereo, -- and the file type (WAV, MP3) +- **lower sampling rate** (the higher the more accurate is the sound), +- **use mono** instead of stereo, +- and **the file type** (WAV, MP3) -Since most microphones on PC have a single channel (mono) and sample at 48kHz, we will focus on resampling to 16kHz. -We will not make the conversion to mp3 here. -We do not treat the endianess since most chips used in computers use "little endianess". +Since most microphones on PC have a single channel (mono) and sample at `48kHz`, +we will focus on resampling to `16kHz`. +We will *not* make the conversion to mp3 here. -We next define the hook in a new JS file, located in the `assets/js` folder. +Next, we define the hook in a new JS file, located in the `assets/js` folder. -To resample to 16kHz, we use an [AudoiContext](https://developer.mozilla.org/en-US/docs/Web/API/AudioContext), +To resample to `16kHz`, we use an [AudioContext](https://developer.mozilla.org/en-US/docs/Web/API/AudioContext), and pass the desired `sampleRate`. We then use the method [decodeAudioData](https://developer.mozilla.org/en-US/docs/Web/API/BaseAudioContext/decodeAudioData) -which receives an [AudoiBuffer](https://developer.mozilla.org/en-US/docs/Web/API/AudioBuffer). -We get one from the Blob method [arrayBuffer()](https://developer.mozilla.org/en-US/docs/Web/API/Blob/arrayBuffer). +which receives an [AudioBuffer](https://developer.mozilla.org/en-US/docs/Web/API/AudioBuffer). +We get one from the `Blob` method [arrayBuffer()](https://developer.mozilla.org/en-US/docs/Web/API/Blob/arrayBuffer). The important part is the `Phoenix.js` function `upload`, to which we pass an identifier `"speech"`: -this sends the data as Blob via a channel to the server. +this sends the data as `Blob` via a channel to the server. We use an action button in the HTML, and attach Javascript listeners to it on the `"click"`, `"dataavailable"` and `"stop"` events. We also play with the CSS classes to modify the appearance of the action button when recording or not. -Navigate to the "asets" folder and run: +Navigate to the `assets` folder and run the following command. +We will use this to lower the sampling rate +and conver the recorded audio file. ```bash -pnpm add "audiobuffer-to-wav" +npm add "audiobuffer-to-wav" ``` Create a file called `assets/js/micro.js` @@ -5885,8 +5890,9 @@ and update it as so: <%= if @image_preview_base64 do %>
@@ -6016,14 +6022,14 @@ and update it as so: > Transcription: <%= if @audio_running? do %> - - <% else %> <%= if @transcription do %> - <%= @transcription %> + <% else %> - Waiting for audio input. - <% end %> <% end %> + <%= if @transcription do %> + <%= @transcription %> + <% else %> + Waiting for audio input. + <% end %> + <% end %>
@@ -6117,69 +6123,40 @@ Head over to `lib/app_web/live/page_live.html.heex` and change it like so:
+

- - + - 🔥 LiveView - - + - + + + - 🐝 Bumblebee - + 🐝 Bumblebee +

- @@ -6469,33 +6388,29 @@ Head over to `lib/app_web/live/page_live.html.heex` and change it like so:
Transcription: <%= if @audio_running? do %> - - <% else %> <%= if @transcription do %> - <%= @transcription %> + <% else %> - Waiting for audio input. - <% end %> <% end %> + <%= if @transcription do %> + <%= @transcription %> + <% else %> + Waiting for audio input. + <% end %> + <% end %>
-
+
-
- found_image +
+ found_image
- <%= @audio_search_result.description %> + <%= @audio_search_result.description %>
+
+
@@ -6633,6 +6548,3 @@ please star it on GitHub, so that we know! ⭐ Thank you! 🙏 -``` - -``` diff --git a/assets/js/micro.js b/assets/js/micro.js index 4b6332c..164bb28 100644 --- a/assets/js/micro.js +++ b/assets/js/micro.js @@ -30,14 +30,7 @@ export default { navigator.mediaDevices.getUserMedia({ audio: true }).then((stream) => { // Instantiate MediaRecorder mediaRecorder = new MediaRecorder(stream); - mediaRecorder.start(); - - /* - const { channelCount, sampleRate } = stream - .getAudioTracks()[0] - .getSettings(); - console.log(channelCount, sampleRate); - */ + mediaRecorder.start() // And update the elements recordButton.classList.remove(...blue); @@ -52,12 +45,14 @@ export default { // Add "stop" event handler for when the recording stops. mediaRecorder.addEventListener("stop", async () => { const audioBlob = new Blob(audioChunks); + // update the source of the Audio tag for the user to listen to his audio audioElement.src = URL.createObjectURL(audioBlob); // create an AudioContext with a sampleRate of 16000 const audioContext = new AudioContext({ sampleRate: 16000 }); + // We optimize the audio to reduce the size of the file whilst maintaining the necessary information for the model ----------- // async read the Blob as ArrayBuffer to feed the "decodeAudioData" const arrayBuffer = await audioBlob.arrayBuffer(); // decodes the ArrayBuffer into the AudioContext format @@ -66,10 +61,13 @@ export default { const wavBuffer = toWav(audioBuffer); // builds a Blob to pass to the Phoenix.JS.upload const wavBlob = new Blob([wavBuffer], { type: "audio/wav" }); + + // upload to the server via a chanel with the built-in Phoenix.JS.upload _this.upload("speech", [wavBlob]); // close the MediaRecorder instance mediaRecorder.stop(); + // cleanups audioChunks = []; recordButton.classList.remove(...pulseGreen); @@ -80,3 +78,4 @@ export default { }); }, }; + diff --git a/assets/package-lock.json b/assets/package-lock.json new file mode 100644 index 0000000..a4d2a19 --- /dev/null +++ b/assets/package-lock.json @@ -0,0 +1,23 @@ +{ + "name": "assets", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "dependencies": { + "audiobuffer-to-wav": "^1.0.0", + "toastify-js": "^1.12.0" + } + }, + "node_modules/audiobuffer-to-wav": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/audiobuffer-to-wav/-/audiobuffer-to-wav-1.0.0.tgz", + "integrity": "sha512-CAoir4NRrAzAgYo20tEMiKZR84coE8bq/L+H2kwAaULVY4+0xySsEVtNT5raqpzmH6y0pqzY6EmoViLd9W8F/w==" + }, + "node_modules/toastify-js": { + "version": "1.12.0", + "resolved": "https://registry.npmjs.org/toastify-js/-/toastify-js-1.12.0.tgz", + "integrity": "sha512-HeMHCO9yLPvP9k0apGSdPUWrUbLnxUKNFzgUoZp1PHCLploIX/4DSQ7V8H25ef+h4iO9n0he7ImfcndnN6nDrQ==" + } + } +} diff --git a/assets/pnpm-lock.yaml b/assets/pnpm-lock.yaml deleted file mode 100644 index 3f15d0d..0000000 --- a/assets/pnpm-lock.yaml +++ /dev/null @@ -1,23 +0,0 @@ -lockfileVersion: '6.0' - -settings: - autoInstallPeers: true - excludeLinksFromLockfile: false - -dependencies: - audiobuffer-to-wav: - specifier: ^1.0.0 - version: 1.0.0 - toastify-js: - specifier: ^1.12.0 - version: 1.12.0 - -packages: - - /audiobuffer-to-wav@1.0.0: - resolution: {integrity: sha512-CAoir4NRrAzAgYo20tEMiKZR84coE8bq/L+H2kwAaULVY4+0xySsEVtNT5raqpzmH6y0pqzY6EmoViLd9W8F/w==} - dev: false - - /toastify-js@1.12.0: - resolution: {integrity: sha512-HeMHCO9yLPvP9k0apGSdPUWrUbLnxUKNFzgUoZp1PHCLploIX/4DSQ7V8H25ef+h4iO9n0he7ImfcndnN6nDrQ==} - dev: false