Gemini Live Integration
The Gemini Live integration shows how to use Sentinel with Google's Gemini Live API for real-time voice conversations.
Installation
Quick Start
from google import genai
from thymia_sentinel import SentinelClient
client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
async def run():
sentinel = SentinelClient(
user_label="user-123",
sample_rate=24000, # Gemini uses 24kHz
policies=["demo_wellbeing_awareness"],
on_policy_result=handle_policy_result,
)
await sentinel.connect()
async with client.aio.live.connect(model=MODEL, config=CONFIG) as session:
async with asyncio.TaskGroup() as tg:
tg.create_task(listen_audio(sentinel))
tg.create_task(send_realtime(session))
tg.create_task(receive_audio(session, sentinel))
tg.create_task(play_audio())
await sentinel.close()
Audio Configuration
Gemini Live uses 24kHz sample rate. Configure Sentinel to match:
SAMPLE_RATE = 24000
sentinel = SentinelClient(
sample_rate=SAMPLE_RATE, # Match Gemini's rate
# ...
)
Capturing Audio
async def listen_audio(sentinel):
"""Capture microphone and send to both Gemini and Sentinel."""
audio_stream = pya.open(
format=pyaudio.paInt16,
channels=1,
rate=SAMPLE_RATE,
input=True,
)
while True:
data = await asyncio.to_thread(audio_stream.read, CHUNK_SIZE)
# Send to Gemini queue
await audio_queue_mic.put({"data": data, "mime_type": "audio/pcm"})
# Send to Sentinel
await sentinel.send_user_audio(data)
Capturing Transcripts
Gemini provides input and output transcriptions:
async def receive_audio(session, sentinel):
user_buffer = []
agent_buffer = []
async for response in session.receive():
sc = response.server_content
if not sc:
continue
# User transcript (input transcription)
if sc.input_transcription and sc.input_transcription.text:
user_buffer.append(sc.input_transcription.text)
# Agent transcript (output transcription)
if sc.output_transcription and sc.output_transcription.text:
# Flush user transcript first
if user_buffer:
await sentinel.send_user_transcript("".join(user_buffer))
user_buffer.clear()
agent_buffer.append(sc.output_transcription.text)
# Agent audio
if sc.model_turn:
for part in sc.model_turn.parts:
if part.inline_data and isinstance(part.inline_data.data, bytes):
await sentinel.send_agent_audio(part.inline_data.data)
# Turn complete - flush buffers
if sc.turn_complete:
if user_buffer:
await sentinel.send_user_transcript("".join(user_buffer))
user_buffer.clear()
if agent_buffer:
await sentinel.send_agent_transcript("".join(agent_buffer))
agent_buffer.clear()
Injecting Safety Actions
Inject actions as user turns:
async def apply_recommended_action(action: str, session):
await session.send_client_content(
turns=[{
"role": "user",
"parts": [{"text": f"[SAFETY GUIDANCE]: {action}"}]
}],
turn_complete=True,
)
Gemini Configuration
MODEL = "gemini-2.5-flash-native-audio-preview-12-2025"
CONFIG = {
"response_modalities": ["AUDIO"],
"system_instruction": SYSTEM_PROMPT,
"input_audio_transcription": {}, # Enable input transcription
"output_audio_transcription": {}, # Enable output transcription
}
Environment Variables
Running the Example
cd examples/gemini_live
cp .env.example .env.local
# Edit .env.local with your API keys
uv run python src/agent.py
The example will start a voice conversation using your microphone and speakers.