Merge pull request #9 from neonwatty/local-youtube-merge

Local youtube merge
neonwatty · Jan 24, 2025 · c418793 · c418793
2 parents 1d44997 + 7ce896e
commit c418793
Show file tree

Hide file tree

Showing 9 changed files with 241 additions and 372 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,26 +1,23 @@
-FROM ubuntu:22.04
-ARG DEBIAN_FRONTEND=noninteractive
+FROM python:3.10-slim
 
-RUN apt-get update && \
-    apt-get install -y \
-    python3 \
-    python3-pip \
-    python3-dev \
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    software-properties-common \
     ffmpeg \
     && rm -rf /var/lib/apt/lists/*
 
-RUN python3 --version && pip3 --version
-RUN ffmpeg -version
-
-ENV PYTHONPATH=.
-ENV GRADIO_SERVER_PORT=8501
-ENV GRADIO_SERVER_NAME="0.0.0.0"
-
 WORKDIR /home
 
-COPY requirements.gradio .
+ENV PYTHONPATH=.
 
-RUN pip3 install --no-cache-dir -r requirements.gradio
+COPY requirements.txt /home/requirements.txt
+COPY .streamlit /home/.streamlit
+RUN pip3 install --no-cache-dir -r /home/requirements.txt
 COPY bleep_that_sht /home/bleep_that_sht
+COPY data /home/data
+
+EXPOSE 8501
+
+HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health || exit 1
 
-ENTRYPOINT ["python3", "/home/bleep_that_sht/gradio_app_url_download.py"]
+ENTRYPOINT ["streamlit", "run", "/home/bleep_that_sht/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
diff --git a/README.md b/README.md
@@ -14,9 +14,8 @@ All processing is performed locally.
 
 - [Examples](#examples)
 - [Installation](#installation)
-  - [For local videos](#for-local-videos)
-  - [For Youtube videos](#for-youtube-videos)
-- [Walkthrough](#walkthrough)
+- [App walkthrough](#app-walkthrough)
+- [Tech walkthrough](#tech-walkthrough)
 
 ## Examples
 
@@ -34,42 +33,52 @@ https://github.com/neonwatty/bleep_that_sht/assets/16326421/63ebd7a0-46f6-4efd-8
 
 ## Installation
 
-### For local videos
+### Using docker
+
+Use docker to quickly to spin up the app in an isolated container by typing the following at your terminal
+
+```bash
+docker compose up
+```
+
+Then navigate to `http://localhost:8501/` to use the app.
+
+### Using python
 
 To get setup to run the notebook / bleep your own videos / run the strealit demo first install the requirements for this project by pasting the below in your terminal.
 
-```python
-pip install -r requirements.streamlit
+```bash
+pip install -r requirements.txt
 ```
 
-To install requirements for the gradio demo use this install
+Then activate the app server by typing the following at your terminal
 
-```python
-pip install -r requirements.gradio
+```bash
+streamlit run /home/bleep_that_sht/app.py --server.port=8501 --server.address=0.0.0.0
 ```
 
-You will need [ffmpeg](https://www.ffmpeg.org/download.html) installed on your machine as well.
+Then navigate to `http://localhost:8501/` to use the app in any browser.
 
-### For Youtube videos
+**Note:** you will need [ffmpeg](https://www.ffmpeg.org/download.html) installed on your machine as well.
 
-Start this streamlit demo locally that lets you enter in a youtube / shorts url to a video you wish to bleep
+## App walkthrough
 
-```python
-python -m streamlit run bleep_that_sht/app_url_download.py
-```
+Once you have the app up and running and have navigated to ``http://localhost:8501/`, there are three tabs you can choose from
 
-Alternatively you can start a gradio server with the same functionality
+**The first tab** allows for local video upload and processing.
 
-```python
-python -m bleep_that_sht/gradio_app_url_download.py
-```
+**The second tab** allows for youtube url download and processing.
 
-You can also use docker compose
+**The third tab** has handy "about" information for convenience.
 
-```bash
-docker compose up
-```
+The app may take longer than usual during the initial processing of local videos or YouTube content because it needs to download the transcription model.
+
+A quick walkthrough of both local video and youtube processing is shown below.
+
+<p align="center">
+<img align="center" src="https://github.com/jermwatt/readme_gifs/blob/main/bleep-that-sht-full.webp" height="350">
+</p>
 
-## Walkthrough
+## Tech walkthrough
 
 See `beep_that_sht_walkthrough.ipynb`) to play / see nitty gritty details.
diff --git a/bleep_that_sht/streamlit_app_url_download.py → bleep_that_sht/app.py b/bleep_that_sht/streamlit_app_url_download.py → bleep_that_sht/app.py
@@ -1,5 +1,6 @@
 import streamlit as st
 from bleep_that_sht.transcribe import avaliable_models
+from bleep_that_sht import main_dir
 from bleep_that_sht.transcribe import transcribe
 from bleep_that_sht.audio_extractor import extract_audio
 from bleep_that_sht.create import bleep_replace
@@ -11,23 +12,118 @@
 st.set_page_config(page_title="🎬 Bleep That Sh*t 🙊")
 st.title("🎬 Bleep That Sh*t 🙊")
 
-tab1, tab2 = st.tabs(["🎬 Bleep That Sh*t 🙊", "💡 About"])
+tab1, tab2, tab3 = st.tabs(["🎬 Bleep That Sh*t 🙊 (local video)", "🎬 Bleep That Sh*t 🙊 (YouTube url)", "💡 About"])
 
-with tab2:
+with tab3:
     st.markdown(
         "### Bleep out words of your choice from an input video.  \n"
         "How it works: \n\n"
-        "1.  Provided a youtube / shorts url \n"
+        "1.  Provided a youtube / shorts url or upload your own video \n"
         "2.  Choose your your desired bleep keywords \n"
-        "3.  (if running locally) Choose a model from the Whisper family to transcribe the audio (defaults to base only for HF space) \n"
+        "3.  Choose a model from the Whisper family to transcribe the audio - the larger the model the more accurate the results but the more compute power is required.  Typically the smaller `base` model works well for this application. \n"
         "4.  (optional) Press 'Just Transcribe' to examine / download just the transcription of the video (can help in choosing bleep words) \n"
         "5.  Press 'Transcribe and bleep' to transcribe and replace all instances of your keywords with *beep* sounds \n\n"
         "If you want to select your Whisper model / run longer videos pull and run the app locally. \n\n"
         "Notice: baseline (not fine tuned) Whisper models are used here - you may need to be creative to bleep out all the versions of an input word you want depending on its transcription. \n\n"
         "You do *not* need a GPU to run this locally.  Larger models take more time to process locally, but its doable. \n"
     )
 
+
 with tab1:
+    with st.container(border=True):
+        uploaded_file = st.file_uploader("Choose a video...", type=["mp4"])
+
+    with st.container(border=True):
+        col1, col2, col3 = st.columns([8, 3, 4])
+        with col1:
+            bleep_words = st.text_area(
+                label="bleep-word list",
+                placeholder="bleep keywords go here separated by commas",
+                value="treetz, ice, cream, chocolate, syrup, cookie, hooked, threats, treats",
+            )
+        with col2:
+            model_selection = st.selectbox(
+                label="whisper model",
+                placeholder="choose whisper model",
+                index=1,
+                options=avaliable_models,
+                key="model-selection-local"
+            )
+        with col3:
+            col4 = st.empty()
+            with col4:
+                st.write("")
+                st.write("")
+            col5 = st.container()
+            with col5:
+                trans_button_val = st.button(label="just transcribe", type="secondary", key="just-transcribe-local")
+            col6 = st.container()
+            with col6:
+                bleep_button_val = st.button(label="transcribe & bleep", type="primary", key="transcribe-bleep-local")
+
+    a, col0, b = st.columns([1, 20, 1])
+    colo1, colo2 = st.columns([3, 3])
+
+    def button_logic(temporary_video_location: str, model_selection: str, bleep_word_list: list):
+        temporary_audio_location = temporary_video_location.replace("mp4", "mp3")
+        bleep_video_output = temporary_video_location.replace("original", "bleep")
+        bleep_audio_output = bleep_video_output.replace("mp4", "mp3")
+
+        if trans_button_val:
+            extract_audio(temporary_video_location, temporary_audio_location)
+            transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection)
+            with col0.container(border=True):
+                st.text_area(
+                    value=transcript.strip(),
+                    placeholder="transcribe text will be shown here",
+                    label="transcribe text",
+                )
+
+        if bleep_button_val:
+            extract_audio(temporary_video_location, temporary_audio_location)
+            transcript, timestamped_transcript = transcribe(local_file_path=temporary_audio_location, model=model_selection)
+
+            with col0.container(border=True):
+                st.text_area(
+                    value=transcript.strip(),
+                    placeholder="transcribe text will be shown here",
+                    label="transcribe text",
+                )
+
+            bleep_replace(
+                temporary_video_location,
+                temporary_audio_location,
+                bleep_video_output,
+                bleep_audio_output,
+                bleep_word_list,
+                timestamped_transcript,
+            )
+
+            with colo2:
+                st.caption("bleeped video")
+                st.video(bleep_video_output)
+
+    default_file = main_dir + "/data/input/bleep_test_1.mp4"
+    if uploaded_file is not None:
+        byte_file = io.BytesIO(uploaded_file.read())
+    else:
+        filename = open(default_file, "rb")
+        byte_file = io.BytesIO(filename.read())
+
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        temporary_video_location = tmpdirname + "/" + "original.mp4"
+        with open(temporary_video_location, "wb") as out:
+            out.write(byte_file.read())
+            with st.container(border=True):
+                with colo1:
+                    st.caption("original video")
+                    st.video(temporary_video_location)
+            bleep_word_list = bleep_words.split(",")
+            bleep_words_list = [v.strip() for v in bleep_word_list if len(v.strip()) > 0]
+            button_logic(temporary_video_location, model_selection, bleep_words_list)
+            out.close()
+
+with tab2:
     with st.container(border=True):
         upload_url = st.text_input(
             label="youtube/shorts url",
@@ -47,6 +143,7 @@
                 label="whisper model (base only in HF space)",
                 index=1,
                 options=avaliable_models,
+                key="model-selection-yt"
             )
         with col3:
             col4 = st.empty()
@@ -55,10 +152,10 @@
                 st.write("")
             col5 = st.container()
             with col5:
-                trans_button_val = st.button(label="just transcribe", type="secondary")
+                trans_button_val = st.button(label="just transcribe", type="secondary", key="just-transcribe-yt")
             col6 = st.container()
             with col6:
-                bleep_button_val = st.button(label="transcribe & bleep", type="primary")
+                bleep_button_val = st.button(label="transcribe & bleep", type="primary", key="transcribe-bleep-yt")
 
     a, col0, b = st.columns([1, 20, 1])
     colo1, colo2 = st.columns([3, 3])