From 8495ce7619235d9a6c6478114cf69a077d73fdf4 Mon Sep 17 00:00:00 2001 From: Mark McDonald Date: Thu, 12 Dec 2024 06:41:33 -0800 Subject: [PATCH] Add a Bash websocket example to the cookbook (#336) * Add a Bash websocket example to the cookbook * Extra logging --- gemini-2/websockets/README.md | 1 + gemini-2/websockets/shell_websockets.sh | 48 +++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100755 gemini-2/websockets/shell_websockets.sh diff --git a/gemini-2/websockets/README.md b/gemini-2/websockets/README.md index 5274da411..429efbe5a 100644 --- a/gemini-2/websockets/README.md +++ b/gemini-2/websockets/README.md @@ -10,6 +10,7 @@ To learn about what’s new in the 2.0 model release and the new [Google GenAI S Explore Gemini 2.0’s capabilities on your own local machine. * [Live API starter script](./live_api_starter.py) \- A locally runnable Python script using websockets that supports streaming audio in and out from your machine +* [Bash Websocket example](./shell_websockets.sh) \- A bash script using [`websocat`](https://github.com/vi/websocat) to interact with the Live API in a shell context Explore Gemini 2.0’s capabilities through the following notebooks you can run through Google Colab. diff --git a/gemini-2/websockets/shell_websockets.sh b/gemini-2/websockets/shell_websockets.sh new file mode 100755 index 000000000..e59e27b88 --- /dev/null +++ b/gemini-2/websockets/shell_websockets.sh @@ -0,0 +1,48 @@ +#!/bin/bash +#set -ex + +# This script shows you how to use `websocat` to interact with the Gemini 2.0 +# Multimodal Live API. + +# You need to set $GOOGLE_API_KEY +# And you'll need: +# $ sudo apt install jq +# $ wget https://github.com/vi/websocat/releases/download/v1.14.0/websocat.x86_64-unknown-linux-musl +# (or relevant binary from https://github.com/vi/websocat/releases) + +echo "HOST: ${HOST:=generativelanguage.googleapis.com}" +echo "MODEL: ${MODEL:=gemini-2.0-flash-exp}" +API_KEY=${GOOGLE_API_KEY:?Please set \$GOOGLE_API_KEY} + +echo "Starting..." + +# Define some pipes so we can separate model input and output. +mkfifo gemini_{in,out}put +echo "Pipes laid..." + +# Process model output in the background. +# Uncomment this to do explicit line-by-line processing: +#while IFS= read -r line; do +# jq <<<"$line" +#done < gemini_output & + +# Or use this to `jq` everything: +jq --stream 'fromstream(0|truncate_stream(inputs))' gemini_output & +socket_pid=$! +echo "Model connected." + +# Issue setup handshake. +echo '{"setup": {"model": "models/'${MODEL}'", "generation_config": {"response_modalities":["TEXT"]}}}' |tee >(jq) >gemini_input + +# Generate something. +echo '{"client_content": { "turn_complete": true, "turns": [{"role": "user", "parts": [{"text": "what is 10 + 10?"}]}]}}' |tee >(jq) >gemini_input + +sleep 5 +rm gemini_{in,out}put +kill -9 $output_pid $socket_pid 2>/dev/null || true