phone/src/operator.ts

import Buzz from "./buzz/index.ts"
import type { Playback } from "./buzz/utils.ts"
import { Agent } from "./agent/index.ts"
import { searchWeb } from "./agent/tools.ts"
import { getSound, WaitingSounds } from "./utils/waiting-sounds.ts"

const runPhoneSystem = async (agentId: string, apiKey: string) => {
  console.log("📞 Phone System Starting\n")
  await Buzz.setVolume(0.4)

  const recorder = await Buzz.defaultRecorder()
  const player = await Buzz.defaultPlayer()

  const agent = new Agent({
    agentId,
    apiKey,
    tools: {
      search_web: (args: { query: string }) => searchWeb(args.query),
    },
  })

  let currentDialtone: Playback | undefined
  let currentBackgroundNoise: Playback | undefined
  let streamPlayback = player.playStream()
  const waitingIndicator = new WaitingSounds(player, streamPlayback)

  // Set up agent event listeners
  agent.events.connect(async (event) => {
    switch (event.type) {
      case "connected":
        console.log("✅ Connected to AI agent\n")
        break

      case "user_transcript":
        console.log(`👤 You: ${event.transcript}`)
        break

      case "agent_response":
        console.log(`🤖 Agent: ${event.response}`)
        break

      case "audio":
        await waitingIndicator.stop()
        const audioBuffer = Buffer.from(event.audioBase64, "base64")
        streamPlayback.write(audioBuffer)
        break

      case "interruption":
        console.log("🛑 User interrupted")
        streamPlayback?.stop()
        streamPlayback = player.playStream() // Reset playback stream
        break

      case "tool_call":
        waitingIndicator.start()
        console.log(`🔧 Tool call: ${event.name}(${JSON.stringify(event.args)})`)
        break

      case "tool_result":
        console.log(`✅ Tool result: ${JSON.stringify(event.result)}`)
        break

      case "tool_error":
        console.error(`❌ Tool error: ${event.error}`)
        break

      case "disconnected":
        console.log("\n👋 Conversation ended, returning to dialtone\n")
        streamPlayback?.stop()
        state = "WAITING_FOR_VOICE"
        startDialtone()
        break

      case "error":
        console.error("Agent error:", event.error)
    }
  })

  const recording = recorder.start()
  const audioStream = recording.stream()
  console.log("🎤 Recording started\n")

  type State = "WAITING_FOR_VOICE" | "IN_CONVERSATION"
  let state: State = "WAITING_FOR_VOICE"
  let preConnectionBuffer: Uint8Array[] = []

  const startDialtone = async () => {
    console.log("🔊 Playing dialtone (waiting for speech)...\n")
    await currentBackgroundNoise?.stop()
    currentBackgroundNoise = undefined
    currentDialtone = await player.playTone([350, 440], Infinity)
  }

  const stopDialtone = async () => {
    await currentDialtone?.stop()
    currentDialtone = undefined
    currentBackgroundNoise = await player.play(getSound("background"), { repeat: true })
  }

  const startConversation = async () => {
    stopDialtone()

    state = "IN_CONVERSATION"
    await agent.start()

    // Send pre-buffered audio
    for (const chunk of preConnectionBuffer) {
      agent.sendAudio(chunk)
    }
    preConnectionBuffer = []
  }

  await startDialtone()

  const vadThreshold = 5000
  const maxPreBufferChunks = 4 // Keep ~1 second of audio before speech detection

  for await (const chunk of audioStream) {
    if (state === "WAITING_FOR_VOICE") {
      // Keep a rolling buffer of recent audio
      preConnectionBuffer.push(chunk)
      if (preConnectionBuffer.length > maxPreBufferChunks) {
        preConnectionBuffer.shift()
      }

      const rms = Buzz.calculateRMS(chunk)
      if (rms > vadThreshold) {
        console.log(`🗣️  Speech detected! (RMS: ${Math.round(rms)})`)
        await startConversation()
      }
    } else if (state === "IN_CONVERSATION") {
      agent.sendAudio(chunk)
    }
  }

  const cleanup = async () => {
    console.log("\n\n🛑 Shutting down phone system...")
    await currentDialtone?.stop()
    await currentBackgroundNoise?.stop()
    await streamPlayback?.stop()
    await agent.stop()
    process.exit(0)
  }

  process.on("SIGINT", cleanup)
}

const apiKey = process.env.ELEVEN_API_KEY
const agentId = process.env.ELEVEN_AGENT_ID

if (!apiKey) {
  console.error("❌ Error: ELEVEN_API_KEY environment variable is required")
  process.exit(1)
}

if (!agentId) {
  console.error(
    "❌ Error: ELEVEN_AGENT_ID environELEVEN_AGENT_ID=agent_5601k4taw2cvfjzrz6snxpgeh7x8 ELEVEN_API_KEY=sk_0313740f112c5992cb62ed96c974ab19b5916f1ea172471fment variable is required"
  )
  console.error("   Create an agent at https://elevenlabs.io/app/conversational-ai")
  process.exit(1)
}

await runPhoneSystem(agentId, apiKey)