phone/src/agent/README.md
2025-11-20 18:18:47 -08:00

4.2 KiB

Agent

A clean, reusable wrapper for ElevenLabs conversational AI WebSocket protocol. Uses Signal-based events and provides simple tool registration.

Basic Usage

import { Agent } from './pi/agent'
import Buzz from './pi/buzz'

const agent = new Agent({
  agentId: process.env.ELEVEN_AGENT_ID!,
  apiKey: process.env.ELEVEN_API_KEY!,
  tools: {
    search_web: async (args) => {
      return { results: [`Result for ${args.query}`] }
    }
  }
})

// Set up event handlers
const player = await Buzz.player()
let playback = player.playStream()

agent.events.connect((event) => {
  if (event.type === 'audio') {
    const audioBuffer = Buffer.from(event.audioBase64, 'base64')
    if (!playback.isPlaying) playback = player.playStream()
    playback.write(audioBuffer)
  }
  else if (event.type === 'interruption') {
    playback.stop()
  }
  else if (event.type === 'user_transcript') {
    console.log(`User: ${event.transcript}`)
  }
  else if (event.type === 'agent_response') {
    console.log(`Agent: ${event.response}`)
  }
})

// Start conversation
await agent.start()

// Continuously stream audio
const recorder = await Buzz.recorder()
const recording = recorder.start()
for await (const chunk of recording.stream()) {
  agent.sendAudio(chunk)
}

VAD Pattern

const recorder = await Buzz.recorder()
const recording = recorder.start()
const buffer = new RollingBuffer()

let agent: Agent | undefined

for await (const chunk of recording.stream()) {
  if (!agent) {
    // Waiting for voice
    buffer.add(chunk)
    const rms = Buzz.calculateRMS(chunk)

    if (rms > vadThreshold) {
      // Speech detected! Start conversation
      agent = new Agent({ agentId, apiKey, tools })
      agent.events.connect(eventHandler)
      await agent.start()

      // Send buffered audio
      const buffered = buffer.flush()
      agent.sendAudio(buffered)
    }
  } else {
    // In conversation - stream continuously
    agent.sendAudio(chunk)
  }
}

API

Constructor

new Agent({
  agentId: string,
  apiKey: string,
  tools?: {
    [toolName: string]: (args: Record<string, unknown>) => Promise<unknown> | unknown
  },
  conversationConfig?: {
    agentConfig?: object,
    ttsConfig?: object,
    customLlmExtraBody?: { temperature?: number, max_tokens?: number },
    dynamicVariables?: Record<string, string | number | boolean>
  }
})

Methods

  • await agent.start() - Connect WebSocket and start conversation
  • agent.sendAudio(chunk: Uint8Array) - Send audio chunk (buffers during connection)
  • agent.sendMessage(text: string) - Send text message to agent
  • agent.sendContextUpdate(text: string) - Update context during conversation
  • await agent.stop() - Close WebSocket and clean up

Properties

  • agent.events: Signal<AgentEvent> - Connect to receive all events
  • agent.isConnected: boolean - Current connection state
  • agent.conversationId?: string - Available after connected event

Events

All events are emitted through agent.events:

Connection

  • { type: 'connected', conversationId, audioFormat }
  • { type: 'disconnected' }
  • { type: 'error', error }

Conversation

  • { type: 'user_transcript', transcript }
  • { type: 'agent_response', response }
  • { type: 'agent_response_correction', original, corrected }
  • { type: 'tentative_agent_response', response }
  • { type: 'audio', audioBase64, eventId }
  • { type: 'interruption', eventId }

Tools

  • { type: 'tool_call', name, args, callId }
  • { type: 'tool_result', name, result, callId }
  • { type: 'tool_error', name, error, callId }

Optional

  • { type: 'vad_score', score }
  • { type: 'ping', eventId, pingMs }

Design Principles

  • Generic: Not tied to phone systems, works in any context
  • Flexible audio: You control when to send audio, Agent just handles WebSocket
  • Event-driven: All communication through Signal events, no throws
  • Simple tools: Just pass a function map to constructor
  • Automatic buffering: Sends buffered audio when connection opens
  • Automatic chunking: Handles 8000-byte chunking internally

See Also

  • Design doc: docs/plans/2025-01-16-agent-refactor-design.md
  • Original implementation: pi/agent/old-index.ts