165 lines
4.6 KiB
TypeScript
Executable File
165 lines
4.6 KiB
TypeScript
Executable File
import Buzz from "./buzz/index.ts"
|
|
import type { Playback } from "./buzz/utils.ts"
|
|
import { Agent } from "./agent/index.ts"
|
|
import { searchWeb } from "./agent/tools.ts"
|
|
import { getSound, WaitingSounds } from "./utils/waiting-sounds.ts"
|
|
|
|
const runPhoneSystem = async (agentId: string, apiKey: string) => {
|
|
console.log("📞 Phone System Starting\n")
|
|
await Buzz.setVolume(0.4)
|
|
|
|
const recorder = await Buzz.defaultRecorder()
|
|
const player = await Buzz.defaultPlayer()
|
|
|
|
const agent = new Agent({
|
|
agentId,
|
|
apiKey,
|
|
tools: {
|
|
search_web: (args: { query: string }) => searchWeb(args.query),
|
|
},
|
|
})
|
|
|
|
let currentDialtone: Playback | undefined
|
|
let currentBackgroundNoise: Playback | undefined
|
|
let streamPlayback = player.playStream()
|
|
const waitingIndicator = new WaitingSounds(player, streamPlayback)
|
|
|
|
// Set up agent event listeners
|
|
agent.events.connect(async (event) => {
|
|
switch (event.type) {
|
|
case "connected":
|
|
console.log("✅ Connected to AI agent\n")
|
|
break
|
|
|
|
case "user_transcript":
|
|
console.log(`👤 You: ${event.transcript}`)
|
|
break
|
|
|
|
case "agent_response":
|
|
console.log(`🤖 Agent: ${event.response}`)
|
|
break
|
|
|
|
case "audio":
|
|
await waitingIndicator.stop()
|
|
const audioBuffer = Buffer.from(event.audioBase64, "base64")
|
|
streamPlayback.write(audioBuffer)
|
|
break
|
|
|
|
case "interruption":
|
|
console.log("🛑 User interrupted")
|
|
streamPlayback?.stop()
|
|
streamPlayback = player.playStream() // Reset playback stream
|
|
break
|
|
|
|
case "tool_call":
|
|
waitingIndicator.start()
|
|
console.log(`🔧 Tool call: ${event.name}(${JSON.stringify(event.args)})`)
|
|
break
|
|
|
|
case "tool_result":
|
|
console.log(`✅ Tool result: ${JSON.stringify(event.result)}`)
|
|
break
|
|
|
|
case "tool_error":
|
|
console.error(`❌ Tool error: ${event.error}`)
|
|
break
|
|
|
|
case "disconnected":
|
|
console.log("\n👋 Conversation ended, returning to dialtone\n")
|
|
streamPlayback?.stop()
|
|
state = "WAITING_FOR_VOICE"
|
|
startDialtone()
|
|
break
|
|
|
|
case "error":
|
|
console.error("Agent error:", event.error)
|
|
}
|
|
})
|
|
|
|
const recording = recorder.start()
|
|
const audioStream = recording.stream()
|
|
console.log("🎤 Recording started\n")
|
|
|
|
type State = "WAITING_FOR_VOICE" | "IN_CONVERSATION"
|
|
let state: State = "WAITING_FOR_VOICE"
|
|
let preConnectionBuffer: Uint8Array[] = []
|
|
|
|
const startDialtone = async () => {
|
|
console.log("🔊 Playing dialtone (waiting for speech)...\n")
|
|
await currentBackgroundNoise?.stop()
|
|
currentBackgroundNoise = undefined
|
|
currentDialtone = await player.playTone([350, 440], Infinity)
|
|
}
|
|
|
|
const stopDialtone = async () => {
|
|
await currentDialtone?.stop()
|
|
currentDialtone = undefined
|
|
currentBackgroundNoise = await player.play(getSound("background"), { repeat: true })
|
|
}
|
|
|
|
const startConversation = async () => {
|
|
stopDialtone()
|
|
|
|
state = "IN_CONVERSATION"
|
|
await agent.start()
|
|
|
|
// Send pre-buffered audio
|
|
for (const chunk of preConnectionBuffer) {
|
|
agent.sendAudio(chunk)
|
|
}
|
|
preConnectionBuffer = []
|
|
}
|
|
|
|
await startDialtone()
|
|
|
|
const vadThreshold = 5000
|
|
const maxPreBufferChunks = 4 // Keep ~1 second of audio before speech detection
|
|
|
|
for await (const chunk of audioStream) {
|
|
if (state === "WAITING_FOR_VOICE") {
|
|
// Keep a rolling buffer of recent audio
|
|
preConnectionBuffer.push(chunk)
|
|
if (preConnectionBuffer.length > maxPreBufferChunks) {
|
|
preConnectionBuffer.shift()
|
|
}
|
|
|
|
const rms = Buzz.calculateRMS(chunk)
|
|
if (rms > vadThreshold) {
|
|
console.log(`🗣️ Speech detected! (RMS: ${Math.round(rms)})`)
|
|
await startConversation()
|
|
}
|
|
} else if (state === "IN_CONVERSATION") {
|
|
agent.sendAudio(chunk)
|
|
}
|
|
}
|
|
|
|
const cleanup = async () => {
|
|
console.log("\n\n🛑 Shutting down phone system...")
|
|
await currentDialtone?.stop()
|
|
await currentBackgroundNoise?.stop()
|
|
await streamPlayback?.stop()
|
|
await agent.stop()
|
|
process.exit(0)
|
|
}
|
|
|
|
process.on("SIGINT", cleanup)
|
|
}
|
|
|
|
const apiKey = process.env.ELEVEN_API_KEY
|
|
const agentId = process.env.ELEVEN_AGENT_ID
|
|
|
|
if (!apiKey) {
|
|
console.error("❌ Error: ELEVEN_API_KEY environment variable is required")
|
|
process.exit(1)
|
|
}
|
|
|
|
if (!agentId) {
|
|
console.error(
|
|
"❌ Error: ELEVEN_AGENT_ID environELEVEN_AGENT_ID=agent_5601k4taw2cvfjzrz6snxpgeh7x8 ELEVEN_API_KEY=sk_0313740f112c5992cb62ed96c974ab19b5916f1ea172471fment variable is required"
|
|
)
|
|
console.error(" Create an agent at https://elevenlabs.io/app/conversational-ai")
|
|
process.exit(1)
|
|
}
|
|
|
|
await runPhoneSystem(agentId, apiKey)
|