From 8495ce7619235d9a6c6478114cf69a077d73fdf4 Mon Sep 17 00:00:00 2001
From: Mark McDonald <macd@google.com>
Date: Thu, 12 Dec 2024 06:41:33 -0800
Subject: [PATCH] Add a Bash websocket example to the cookbook (#336)

* Add a Bash websocket example to the cookbook
* Extra logging
---
 gemini-2/websockets/README.md           |  1 +
 gemini-2/websockets/shell_websockets.sh | 48 +++++++++++++++++++++++++
 2 files changed, 49 insertions(+)
 create mode 100755 gemini-2/websockets/shell_websockets.sh

diff --git a/gemini-2/websockets/README.md b/gemini-2/websockets/README.md
index 5274da411..429efbe5a 100644
--- a/gemini-2/websockets/README.md
+++ b/gemini-2/websockets/README.md
@@ -10,6 +10,7 @@ To learn about what’s new in the 2.0 model release and the new [Google GenAI S
 Explore Gemini 2.0’s capabilities on your own local machine.
 
 * [Live API starter script](./live_api_starter.py) \- A locally runnable Python script using websockets that supports streaming audio in and out from your machine
+* [Bash Websocket example](./shell_websockets.sh) \- A bash script using [`websocat`](https://github.com/vi/websocat) to interact with the Live API in a shell context
 
 Explore Gemini 2.0’s capabilities through the following notebooks you can run through Google Colab.
 
diff --git a/gemini-2/websockets/shell_websockets.sh b/gemini-2/websockets/shell_websockets.sh
new file mode 100755
index 000000000..e59e27b88
--- /dev/null
+++ b/gemini-2/websockets/shell_websockets.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+#set -ex
+
+# This script shows you how to use `websocat` to interact with the Gemini 2.0
+# Multimodal Live API.
+
+# You need to set $GOOGLE_API_KEY
+# And you'll need:
+#  $ sudo apt install jq
+#  $ wget https://github.com/vi/websocat/releases/download/v1.14.0/websocat.x86_64-unknown-linux-musl
+#    (or relevant binary from https://github.com/vi/websocat/releases)
+
+echo "HOST: ${HOST:=generativelanguage.googleapis.com}"
+echo "MODEL: ${MODEL:=gemini-2.0-flash-exp}"
+API_KEY=${GOOGLE_API_KEY:?Please set \$GOOGLE_API_KEY}
+
+echo "Starting..."
+
+# Define some pipes so we can separate model input and output.
+mkfifo gemini_{in,out}put
+echo "Pipes laid..."
+
+# Process model output in the background.
+# Uncomment this to do explicit line-by-line processing:
+#while IFS= read -r line; do
+#  jq <<<"$line"
+#done < gemini_output &
+
+# Or use this to `jq` everything:
+jq --stream 'fromstream(0|truncate_stream(inputs))' <gemini_output &
+
+output_pid=$!
+echo "Output processing..."
+
+# Launch the model connection and wire it up to the pipes.
+websocat -n wss://${HOST}/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent?key=${API_KEY} <gemini_input >gemini_output &
+socket_pid=$!
+echo "Model connected."
+
+# Issue setup handshake.
+echo '{"setup": {"model": "models/'${MODEL}'", "generation_config": {"response_modalities":["TEXT"]}}}' |tee >(jq) >gemini_input
+
+# Generate something.
+echo '{"client_content": { "turn_complete": true, "turns": [{"role": "user", "parts": [{"text": "what is 10 + 10?"}]}]}}' |tee >(jq) >gemini_input
+
+sleep 5
+rm gemini_{in,out}put
+kill -9 $output_pid $socket_pid 2>/dev/null || true