Skip to content

Commit

Permalink
Adding screen share support to live_api_starter.py
Browse files Browse the repository at this point in the history
  • Loading branch information
clementfarabet authored Dec 17, 2024
1 parent 97c0998 commit 379912b
Showing 1 changed file with 36 additions and 2 deletions.
38 changes: 36 additions & 2 deletions gemini-2/live_api_starter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# limitations under the License.

# To install the dependencies for this script, run:
# pip install google-genai opencv-python pyaudio pillow
# pip install google-genai opencv-python pyaudio pillow mss
# And to run this script, ensure the GOOGLE_API_KEY environment
# variable is set to the key you obtained from Google AI Studio.

Expand All @@ -28,6 +28,7 @@
import cv2
import pyaudio
import PIL.Image
import mss

from google import genai

Expand All @@ -44,6 +45,9 @@

MODEL = "models/gemini-2.0-flash-exp"

MODE = "camera"
#MODE = "screen"

client = genai.Client(http_options={"api_version": "v1alpha"})

CONFIG = {"generation_config": {"response_modalities": ["AUDIO"]}}
Expand Down Expand Up @@ -113,6 +117,33 @@ async def get_frames(self):
# Release the VideoCapture object
cap.release()

def _get_screen(self, sct, monitor):
i = sct.grab(monitor)

mime_type = "image/jpeg"
image_bytes = mss.tools.to_png(i.rgb, i.size)
img = PIL.Image.open(io.BytesIO(image_bytes))

image_io = io.BytesIO()
img.save(image_io, format="jpeg")
image_io.seek(0)

image_bytes = image_io.read()
return {"mime_type": mime_type, "data": base64.b64encode(image_bytes).decode()}

async def get_screen(self):
sct = mss.mss()
monitor = sct.monitors[0]

while True:
frame = await asyncio.to_thread(self._get_screen, sct, monitor)
if frame is None:
break

await asyncio.sleep(1.0)

await self.out_queue.put(frame)

async def send_realtime(self):
while True:
msg = await self.out_queue.get()
Expand Down Expand Up @@ -181,7 +212,10 @@ async def run(self):
send_text_task = tg.create_task(self.send_text())
tg.create_task(self.send_realtime())
tg.create_task(self.listen_audio())
tg.create_task(self.get_frames())
if MODE == "camera":
tg.create_task(self.get_frames())
elif MODE == "screen":
tg.create_task(self.get_screen())
tg.create_task(self.receive_audio())
tg.create_task(self.play_audio())

Expand Down

0 comments on commit 379912b

Please sign in to comment.