phone/src/operator.ts
2025-11-18 14:33:25 -08:00

165 lines
4.6 KiB
TypeScript
Executable File

import Buzz from "./buzz/index.ts"
import type { Playback } from "./buzz/utils.ts"
import { Agent } from "./agent/index.ts"
import { searchWeb } from "./agent/tools.ts"
import { getSound, WaitingSounds } from "./utils/waiting-sounds.ts"
const runPhoneSystem = async (agentId: string, apiKey: string) => {
console.log("📞 Phone System Starting\n")
await Buzz.setVolume(0.4)
const recorder = await Buzz.defaultRecorder()
const player = await Buzz.defaultPlayer()
const agent = new Agent({
agentId,
apiKey,
tools: {
search_web: (args: { query: string }) => searchWeb(args.query),
},
})
let currentDialtone: Playback | undefined
let currentBackgroundNoise: Playback | undefined
let streamPlayback = player.playStream()
const waitingIndicator = new WaitingSounds(player, streamPlayback)
// Set up agent event listeners
agent.events.connect(async (event) => {
switch (event.type) {
case "connected":
console.log("✅ Connected to AI agent\n")
break
case "user_transcript":
console.log(`👤 You: ${event.transcript}`)
break
case "agent_response":
console.log(`🤖 Agent: ${event.response}`)
break
case "audio":
await waitingIndicator.stop()
const audioBuffer = Buffer.from(event.audioBase64, "base64")
streamPlayback.write(audioBuffer)
break
case "interruption":
console.log("🛑 User interrupted")
streamPlayback?.stop()
streamPlayback = player.playStream() // Reset playback stream
break
case "tool_call":
waitingIndicator.start()
console.log(`🔧 Tool call: ${event.name}(${JSON.stringify(event.args)})`)
break
case "tool_result":
console.log(`✅ Tool result: ${JSON.stringify(event.result)}`)
break
case "tool_error":
console.error(`❌ Tool error: ${event.error}`)
break
case "disconnected":
console.log("\n👋 Conversation ended, returning to dialtone\n")
streamPlayback?.stop()
state = "WAITING_FOR_VOICE"
startDialtone()
break
case "error":
console.error("Agent error:", event.error)
}
})
const recording = recorder.start()
const audioStream = recording.stream()
console.log("🎤 Recording started\n")
type State = "WAITING_FOR_VOICE" | "IN_CONVERSATION"
let state: State = "WAITING_FOR_VOICE"
let preConnectionBuffer: Uint8Array[] = []
const startDialtone = async () => {
console.log("🔊 Playing dialtone (waiting for speech)...\n")
await currentBackgroundNoise?.stop()
currentBackgroundNoise = undefined
currentDialtone = await player.playTone([350, 440], Infinity)
}
const stopDialtone = async () => {
await currentDialtone?.stop()
currentDialtone = undefined
currentBackgroundNoise = await player.play(getSound("background"), { repeat: true })
}
const startConversation = async () => {
stopDialtone()
state = "IN_CONVERSATION"
await agent.start()
// Send pre-buffered audio
for (const chunk of preConnectionBuffer) {
agent.sendAudio(chunk)
}
preConnectionBuffer = []
}
await startDialtone()
const vadThreshold = 5000
const maxPreBufferChunks = 4 // Keep ~1 second of audio before speech detection
for await (const chunk of audioStream) {
if (state === "WAITING_FOR_VOICE") {
// Keep a rolling buffer of recent audio
preConnectionBuffer.push(chunk)
if (preConnectionBuffer.length > maxPreBufferChunks) {
preConnectionBuffer.shift()
}
const rms = Buzz.calculateRMS(chunk)
if (rms > vadThreshold) {
console.log(`🗣️ Speech detected! (RMS: ${Math.round(rms)})`)
await startConversation()
}
} else if (state === "IN_CONVERSATION") {
agent.sendAudio(chunk)
}
}
const cleanup = async () => {
console.log("\n\n🛑 Shutting down phone system...")
await currentDialtone?.stop()
await currentBackgroundNoise?.stop()
await streamPlayback?.stop()
await agent.stop()
process.exit(0)
}
process.on("SIGINT", cleanup)
}
const apiKey = process.env.ELEVEN_API_KEY
const agentId = process.env.ELEVEN_AGENT_ID
if (!apiKey) {
console.error("❌ Error: ELEVEN_API_KEY environment variable is required")
process.exit(1)
}
if (!agentId) {
console.error(
"❌ Error: ELEVEN_AGENT_ID environELEVEN_AGENT_ID=agent_5601k4taw2cvfjzrz6snxpgeh7x8 ELEVEN_API_KEY=sk_0313740f112c5992cb62ed96c974ab19b5916f1ea172471fment variable is required"
)
console.error(" Create an agent at https://elevenlabs.io/app/conversational-ai")
process.exit(1)
}
await runPhoneSystem(agentId, apiKey)