diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..25fa621 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "typescript.tsdk": "node_modules/typescript/lib" +} diff --git a/baresip/config b/baresip/config index 9485572..26b8161 100644 --- a/baresip/config +++ b/baresip/config @@ -6,7 +6,6 @@ # Core poll_method epoll # poll, select, epoll .. -ring_aufile none # Call call_local_timeout 120 @@ -18,6 +17,7 @@ audio_source alsa,default audio_alert none audio_alert_enable no audio_level no +ring_aufile /dev/null ausrc_format s16 # s16, float, .. auplay_format s16 # s16, float, .. auenc_format s16 # s16, float, .. diff --git a/bun.lock b/bun.lock index b555245..0fb9ba4 100644 --- a/bun.lock +++ b/bun.lock @@ -6,10 +6,11 @@ "dependencies": { "hono": "^4.10.4", "openai": "^6.9.0", - "robot3": "^1.2.0", + "robot3": "1.1.1", }, "devDependencies": { "@types/bun": "latest", + "prettier": "^3.6.2", }, "peerDependencies": { "typescript": "^5", @@ -21,17 +22,19 @@ "@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], - "@types/react": ["@types/react@19.2.5", "", { "dependencies": { "csstype": "^3.0.2" } }, "sha512-keKxkZMqnDicuvFoJbzrhbtdLSPhj/rZThDlKWCDbgXmUg0rEUFtRssDXKYmtXluZlIqiC5VqkCgRwzuyLHKHw=="], + "@types/react": ["@types/react@19.2.6", "", { "dependencies": { "csstype": "^3.2.2" } }, "sha512-p/jUvulfgU7oKtj6Xpk8cA2Y1xKTtICGpJYeJXz2YVO2UcvjQgeRMLDGfDeqeRW2Ta+0QNFwcc8X3GH8SxZz6w=="], "bun-types": ["bun-types@1.3.2", "", { "dependencies": { "@types/node": "*" }, "peerDependencies": { "@types/react": "^19" } }, "sha512-i/Gln4tbzKNuxP70OWhJRZz1MRfvqExowP7U6JKoI8cntFrtxg7RJK3jvz7wQW54UuvNC8tbKHHri5fy74FVqg=="], - "csstype": ["csstype@3.2.2", "", {}, "sha512-D80T+tiqkd/8B0xNlbstWDG4x6aqVfO52+OlSUNIdkTvmNw0uQpJLeos2J/2XvpyidAFuTPmpad+tUxLndwj6g=="], + "csstype": ["csstype@3.2.3", "", {}, "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ=="], "hono": ["hono@4.10.6", "", {}, "sha512-BIdolzGpDO9MQ4nu3AUuDwHZZ+KViNm+EZ75Ae55eMXMqLVhDFqEMXxtUe9Qh8hjL+pIna/frs2j6Y2yD5Ua/g=="], - "openai": ["openai@6.9.0", "", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.25 || ^4.0" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-n2sJRYmM+xfJ0l3OfH8eNnIyv3nQY7L08gZQu3dw6wSdfPtKAk92L83M2NIP5SS8Cl/bsBBG3yKzEOjkx0O+7A=="], + "openai": ["openai@6.9.1", "", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.25 || ^4.0" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-vQ5Rlt0ZgB3/BNmTa7bIijYFhz3YBceAA3Z4JuoMSBftBF9YqFHIEhZakSs+O/Ad7EaoEimZvHxD5ylRjN11Lg=="], - "robot3": ["robot3@1.2.0", "", {}, "sha512-Xin8KHqCKrD9Rqk1ZzZQYjsb6S9DRggcfwBqnVPeM3DLtNCJLxWWTrPJDYm3E+ZiTO7H3VMdgyPSkIbuYnYP2Q=="], + "prettier": ["prettier@3.6.2", "", { "bin": { "prettier": "bin/prettier.cjs" } }, "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ=="], + + "robot3": ["robot3@1.1.1", "", {}, "sha512-kuD0oQg2KUE74FCQ1a5uoRsEJ/bUKrU1D3vnluop9X7LSiGLndejQgjUEcMqJMVzUA836HSXhtY7XNtQiPTCLQ=="], "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], diff --git a/package.json b/package.json index 8c0ca93..cc164c2 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,8 @@ "start": "bun run src/operator.ts" }, "devDependencies": { - "@types/bun": "latest" + "@types/bun": "latest", + "prettier": "^3.6.2" }, "peerDependencies": { "typescript": "^5" @@ -15,7 +16,7 @@ "dependencies": { "hono": "^4.10.4", "openai": "^6.9.0", - "robot3": "^1.2.0" + "robot3": "1.1.1" }, "prettier": { "semi": false, diff --git a/scripts/bootstrap.ts b/scripts/bootstrap.ts index de73d65..905c196 100755 --- a/scripts/bootstrap.ts +++ b/scripts/bootstrap.ts @@ -21,15 +21,18 @@ const WEB_SERVICE_FILE = "/etc/systemd/system/phone-web.service" console.log(`Install directory: ${INSTALL_DIR}`) -console.log("\nStep 1: Ensuring directory exists...") +console.log("\nEnsuring directory exists...") await $`mkdir -p ${INSTALL_DIR}` console.log(`✓ Directory ready: ${INSTALL_DIR}`) -console.log("\nStep 2: Installing dependencies...") +console.log("\nInstalling dependencies...") await $`cd ${INSTALL_DIR} && bun install` console.log(`✓ Dependencies installed`) -console.log("\nStep 3: Installing systemd services...") +console.log("\nInstalling Baresip...") +await $`sudo apt install -y baresip` + +console.log("\nInstalling systemd services...") // Find where bun is installed const bunPath = await $`which bun` .quiet() @@ -87,7 +90,7 @@ await $`systemctl enable phone-ap.service` await $`systemctl enable phone-web.service` console.log("✓ Services enabled") -console.log("\nStep 4: Starting the services...") +console.log("\nStarting the services...") await $`systemctl start phone-ap.service` await $`systemctl start phone-web.service` console.log("✓ Services started") diff --git a/sounds/stalling/sigh2.wav b/sounds/stalling/sigh2.wav deleted file mode 100644 index 365a6d3..0000000 Binary files a/sounds/stalling/sigh2.wav and /dev/null differ diff --git a/sounds/typing/typing2.wav b/sounds/typing/typing.wav similarity index 100% rename from sounds/typing/typing2.wav rename to sounds/typing/typing.wav diff --git a/sounds/typing/typing1.wav b/sounds/typing/typing1.wav deleted file mode 100644 index 4b48c62..0000000 Binary files a/sounds/typing/typing1.wav and /dev/null differ diff --git a/src/agent/index.ts b/src/agent/index.ts index a03a95a..913f2a2 100644 --- a/src/agent/index.ts +++ b/src/agent/index.ts @@ -255,7 +255,7 @@ export class Agent { }) } - #handleClose = (): void => { + #handleClose = (event: CloseEvent): void => { this.#cleanup() this.events.emit({ type: "disconnected" }) } diff --git a/src/buzz/index.ts b/src/buzz/index.ts index f70d0b8..fdccb1e 100644 --- a/src/buzz/index.ts +++ b/src/buzz/index.ts @@ -1,20 +1,25 @@ -import { Player } from "./player.js" -import { Recorder } from "./recorder.js" +import { Player as PlayerClass } from "./player.js" +import { Recorder as RecorderClass } from "./recorder.js" import { listDevices, calculateRMS, findDeviceByLabel, - type AudioFormat, - type Device, + type AudioFormat as AudioFormatType, + type Device as DeviceType, + type Playback as PlaybackType, + type StreamingPlayback as StreamingPlaybackType, + type StreamingRecording as StreamingRecordingType, + type FileRecording as FileRecordingType, } from "./utils.js" -const defaultPlayer = (format?: AudioFormat) => Player.create({ format }) +const defaultPlayer = (format?: AudioFormatType) => PlayerClass.create({ format }) -const player = (label: string, format?: AudioFormat) => Player.create({ label, format }) +const player = (label: string, format?: AudioFormatType) => PlayerClass.create({ label, format }) -const defaultRecorder = (format?: AudioFormat) => Recorder.create({ format }) +const defaultRecorder = (format?: AudioFormatType) => RecorderClass.create({ format }) -const recorder = (label: string, format?: AudioFormat) => Recorder.create({ label, format }) +const recorder = (label: string, format?: AudioFormatType) => + RecorderClass.create({ label, format }) const getVolumeControl = async (cardNumber?: number): Promise => { const output = cardNumber @@ -89,7 +94,13 @@ const Buzz = { calculateRMS, } +declare namespace Buzz { + export type Playback = PlaybackType + export type StreamingPlayback = StreamingPlaybackType + export type StreamingRecording = StreamingRecordingType + export type FileRecording = FileRecordingType + export type Player = PlayerClass + export type Recorder = RecorderClass +} + export default Buzz -export type { Device, AudioFormat } -export { type Player } from "./player.js" -export { type Recorder } from "./recorder.js" diff --git a/src/main.ts b/src/main.ts new file mode 100644 index 0000000..eeedf87 --- /dev/null +++ b/src/main.ts @@ -0,0 +1,26 @@ +import { runPhone } from "./phone" + +const apiKey = process.env.ELEVEN_API_KEY +const agentId = process.env.ELEVEN_AGENT_ID + +if (!apiKey) { + console.error("❌ Error: ELEVEN_API_KEY environment variable is required") + process.exit(1) +} + +if (!agentId) { + console.error( + "❌ Error: ELEVEN_AGENT_ID environELEVEN_AGENT_ID=agent_5601k4taw2cvfjzrz6snxpgeh7x8 ELEVEN_API_KEY=sk_0313740f112c5992cb62ed96c974ab19b5916f1ea172471fment variable is required", + ) + console.error(" Create an agent at https://elevenlabs.io/app/conversational-ai") + process.exit(1) +} + +console.log(`☎️ Starting phone with pid=${process.pid}`) +try { + await runPhone(agentId, apiKey) +} catch (error) { + console.error(`❌ Error starting phone: ${(error as Error).message}`) + process.exit(1) +} +console.log(`👋 Goodbye!`) diff --git a/src/phone.ts b/src/phone.ts index c2f677b..47939c5 100644 --- a/src/phone.ts +++ b/src/phone.ts @@ -1,15 +1,24 @@ -import { d, reduce, createMachine, state, transition, interpret, guard } from "robot3" +import { + d, + reduce, + createMachine, + state, + transition, + interpret, + action, + invoke, + type Service, +} from "robot3" import { Baresip } from "./sip" -import { log } from "./utils/log" +import log from "./utils/log" import { sleep } from "bun" -import { processStderr, processStdout } from "./utils/stdio" import Buzz from "./buzz" import { join } from "path" import GPIO from "./pins" import { Agent } from "./agent" import { searchWeb } from "./agent/tools" - -// TODO: Kill baresip process on exit +import { ring } from "./utils" +import { getSound, WaitingSounds } from "./utils/waiting-sounds" type CancelableTask = () => void @@ -17,422 +26,408 @@ type PhoneContext = { lastError?: string peer?: string numberDialed: number - cancelDialTone?: CancelableTask cancelRinger?: CancelableTask baresip: Baresip - startAgent: () => CancelableTask - cancelAgent?: CancelableTask + stopAgent?: CancelableTask + ringer: GPIO.Output + agentId: string + agentKey: string } -const gpio = new GPIO() -using ringer = gpio.output(17, { resetOnClose: true }) -using hook = gpio.input(27, { pull: "up", debounce: 3 }) -using rotaryInUse = gpio.input(22, { pull: "up", debounce: 3 }) -using rotaryNumber = gpio.input(23, { pull: "up", debounce: 3 }) +type PhoneService = Service + +const player = await Buzz.defaultPlayer() +let dialTonePlayback: Buzz.Playback | undefined + +export const runPhone = async (agentId: string, agentKey: string) => { + const gpio = new GPIO() + using ringer = gpio.output(17, { resetOnClose: true }) + using hook = gpio.input(27, { pull: "up", debounce: 3 }) + using rotaryInUse = gpio.input(22, { pull: "up", debounce: 3 }) + using rotaryNumber = gpio.input(23, { pull: "up", debounce: 3 }) -export const startPhone = async (agentId: string, apiKey: string) => { await Buzz.setVolume(0.4) - log.info(`📞 Hook ${hook.value}`) + log(`📞 Phone is ${hook.value ? "off hook" : "on hook"}`) - let digit = 0 + const phoneService = interpret(phoneMachine, () => {}) + listenForPhoneEvents(phoneService, hook, rotaryInUse, rotaryNumber) + const baresip = await startBaresip(phoneService, hook, ringer) + phoneService.send({ type: "config", baresip, agentId, agentKey, ringer }) - hook.onChange((event) => { - const type = event.value == 0 ? "hang_up" : "pick_up" - log.info(`📞 Hook ${event.value} sending ${type}`) - if (type === "hang_up") { - ringer.value = 1 - } else { - ringer.value = 0 - } - }) - - rotaryInUse.onChange((event) => { - if (event.value === 0) { - digit = 0 - } else { - log.info(`📞 Dialed digit: ${digit}`) - } - }) - - rotaryNumber.onChange((event) => { - if (event.value === 1) { - digit += 1 - } - }) + process.on("SIGINT", () => cleanup(baresip)) + process.on("SIGTERM", () => cleanup(baresip)) // Keep process running await new Promise(() => {}) } -const apiKey = process.env.ELEVEN_API_KEY -const agentId = process.env.ELEVEN_AGENT_ID +const listenForPhoneEvents = ( + phoneService: PhoneService, + hook: GPIO.Input, + rotaryInUse: GPIO.Input, + rotaryNumber: GPIO.Input, +) => { + hook.onChange((event) => { + const type = event.value == 0 ? "hang-up" : "pick-up" + log(`📞 Hook ${event.value} sending ${type}`) + phoneService.send({ type }) + }) -if (!apiKey) { - console.error("❌ Error: ELEVEN_API_KEY environment variable is required") - process.exit(1) + rotaryInUse.onChange((event) => { + if (event.value === 0) { + phoneService.send({ type: "dial-start" }) + } else { + phoneService.send({ type: "dial-stop" }) + } + }) + + rotaryNumber.onChange((event) => { + if (event.value === 1) { + phoneService.send({ type: "digit_increment" }) + } + }) } -if (!agentId) { - console.error( - "❌ Error: ELEVEN_AGENT_ID environELEVEN_AGENT_ID=agent_5601k4taw2cvfjzrz6snxpgeh7x8 ELEVEN_API_KEY=sk_0313740f112c5992cb62ed96c974ab19b5916f1ea172471fment variable is required" - ) - console.error(" Create an agent at https://elevenlabs.io/app/conversational-ai") - process.exit(1) +const startBaresip = async (phoneService: PhoneService, hook: GPIO.Input, ringer: GPIO.Output) => { + const baresipConfig = join(import.meta.dir, "..", "baresip") + const baresip = new Baresip(["/usr/bin/baresip", "-v", "-f", baresipConfig]) + + baresip.registrationSuccess.connect(async () => { + log("🐻 server connected") + if (hook.value === 0) { + phoneService.send({ type: "initialized" }) + } else { + phoneService.send({ type: "pick-up" }) + } + }) + + baresip.callReceived.connect(({ contact }) => { + log(`🐻 incoming call from ${contact}`) + phoneService.send({ type: "incoming-call", from: contact }) + }) + + baresip.callEstablished.connect(({ contact }) => { + log(`🐻 call established with ${contact}`) + phoneService.send({ type: "answered" }) + }) + + baresip.hungUp.connect(() => { + log("🐻 call hung up") + phoneService.send({ type: "remote-hang-up" }) + }) + + baresip.connect().catch((error) => { + log.error("🐻 connection error:", error) + phoneService.send({ type: "error", message: error.message }) + }) + + baresip.error.connect(async ({ message }) => { + log.error("🐻 error:", message) + phoneService.send({ type: "error", message }) + for (let i = 0; i < 4; i++) { + await ring(ringer, 500) + await sleep(250) + } + process.exit(1) + }) + + return baresip } -await startPhone(agentId, apiKey) - -const startBaresip = async (hook: GPIO.InputPin) => { - // const baresipConfig = join(import.meta.dir, "..", "baresip") - // const baresip = new Baresip(["/usr/bin/baresip", "-v", "-f", baresipConfig]) - - // baresip.registrationSuccess.connect(async () => { - // log.info("🐻 server connected") - // const result = await gpio.get(pins.hook) - // if (result.state === "low") { - // phoneService.send({ type: "initialized" }) - // } else { - // phoneService.send({ type: "pick_up" }) - // } - // }) - - // baresip.callReceived.connect(({ contact }) => { - // log.info(`🐻 incoming call from ${contact}`) - // phoneService.send({ type: "incoming_call", from: contact }) - // }) - - // baresip.callEstablished.connect(({ contact }) => { - // log.info(`🐻 call established with ${contact}`) - // phoneService.send({ type: "answered" }) - // }) - - // baresip.hungUp.connect(() => { - // log.info("🐻 call hung up") - // phoneService.send({ type: "remote_hang_up" }) - // }) - - // baresip.connect().catch((error) => { - // log.error("🐻 connection error:", error) - // phoneService.send({ type: "error", message: error.message }) - // }) - - // baresip.error.connect(async ({ message }) => { - // log.error("🐻 error:", message) - // phoneService.send({ type: "error", message }) - // for (let i = 0; i < 4; i++) { - // await ring(500) - // await sleep(250) - // } - // process.exit(1) - // }) - - // const agent = new Agent({ - // agentId, - // apiKey, - // tools: { - // search_web: (args: { query: string }) => searchWeb(args.query), - // }, - // }) +const cleanup = (baresip: Baresip) => { + try { + log("🛑 Shutting down, stopping agent process") + baresip.kill() + } catch (error) { + log.error("Error during shutdown:", error) + } finally { + process.exit(0) + } } -// handleAgentEvents(agent) +const handleError = (ctx: PhoneContext, event: { type: "error"; message?: string }) => { + ctx.lastError = event.message + log.error(`Phone Error: ${event.message}`) + return ctx +} -// const startAgent = () => { -// log.info("☎️ Starting agent conversation") +const config = ( + ctx: PhoneContext, + event: { baresip: Baresip; agentId: string; agentKey: string; ringer: GPIO.Output }, +) => { + ctx.baresip = event.baresip + ctx.agentId = event.agentId + ctx.agentKey = event.agentKey + ctx.ringer = event.ringer + return ctx +} -// if (agentProcess?.stdin) { -// agentProcess.stdin.write("start\n") -// } else { -// log.error("☎️ No agent process stdin available") -// phoneService.send({ type: "remote_hang_up" }) -// } +const startAgent = (service: Service, ctx: PhoneContext) => { + let streamPlayback = player.playStream() -// return () => { -// log.info("☎️ Stopping agent conversation") -// if (agentProcess?.stdin) { -// agentProcess.stdin.write("stop\n") -// } -// } -// } + const agent = new Agent({ + agentId: ctx.agentId, + apiKey: ctx.agentKey, + tools: { + search_web: (args: { query: string }) => searchWeb(args.query), + }, + }) -// const context = (initial?: Partial): PhoneContext => ({ -// numberDialed: 0, -// baresip, -// startAgent, -// ...initial, -// }) + handleAgentEvents(service, agent, streamPlayback) + const stopListening = startListening(service, agent) -// const phoneMachine = createMachine( -// "initializing", -// // prettier-ignore -// { -// initializing: state( -// transition("initialized", "idle"), -// transition("pick_up", "ready", reduce(playDialTone)), -// transition("error", "fault", reduce(handleError))), -// idle: state( -// transition("incoming_call", "incoming", reduce(incomingCall)), -// transition("pick_up", "ready", reduce(playDialTone))), -// incoming: state( -// transition("remote_hang_up", "idle", reduce(stopRinger)), -// transition("pick_up", "connected", reduce(callAnswered))), -// connected: state( -// transition("remote_hang_up", "ready", reduce(playDialTone)), -// transition("hang_up", "idle", reduce(stopCall))), -// ready: state( -// transition("dial_start", "dialing", reduce(dialStart)), -// transition("dial_timeout", "aborted", reduce(stopDialTone)), -// transition("hang_up", "idle", reduce(stopDialTone))), -// dialing: state( -// transition("dial_stop", "outgoing", reduce(makeCall), guard((ctx) => !callAgentGuard(ctx))), -// transition("dial_stop", "connectedToAgent", reduce(makeAgentCall), guard((ctx) => callAgentGuard(ctx))), -// transition("digit_increment", "dialing", reduce(digitIncrement)), -// transition("hang_up", "idle", reduce(stopDialTone))), -// outgoing: state( -// transition("start_agent", "connectedToAgent"), -// transition("answered", "connected"), -// transition("hang_up", "idle", reduce(stopCall))), -// connectedToAgent: state( -// transition("remote_hang_up", "ready", reduce(stopAgent)), -// transition("hang_up", "idle", reduce(stopAgent))), -// aborted: state( -// transition("hang_up", "idle")), -// fault: state(), -// }, -// context -// ) + ctx.stopAgent = () => { + stopListening() + dialTonePlayback?.stop() + streamPlayback.stop() + } -// const phoneService = interpret(phoneMachine, () => {}) + return ctx +} -// d._onEnter = function (machine, to, state, prevState, event) { -// log.info(`📱 ${machine.current} -> ${to} (${JSON.stringify(event)})`) -// } +const startListening = (service: Service, agent: Agent) => { + const abortAgent = new AbortController() -// gpio.monitor(pins.hook, { bias: "pull-up" }, (event) => { -// const type = event.edge === "falling" ? "hang_up" : "pick_up" -// log.info(`📞 Hook ${event.edge} sending ${type}`) -// phoneService.send({ type }) -// }) + new Promise(async (resolve) => { + const recorder = await Buzz.defaultRecorder() + const listenPlayback = recorder.start() + let backgroundNoisePlayback: Buzz.Playback | undefined + let waitingForVoice = true + const maxPreBufferChunks = 4 // Keep ~1 second of audio before speech detection -// gpio.monitor(pins.rotaryInUse, { bias: "pull-up", throttleMs: 90 }, (event) => { -// const type = event.edge === "falling" ? "dial_start" : "dial_stop" -// log.debug(`📞 Rotary in-use ${event.edge} sending ${type}`) -// phoneService.send({ type }) -// }) + let preConnectionBuffer: Uint8Array[] = [] -// gpio.monitor(pins.rotaryNumber, { bias: "pull-up", throttleMs: 90 }, (event) => { -// if (event.edge !== "rising") return -// phoneService.send({ type: "digit_increment" }) -// }) + agent.events.connect(async (event) => { + if (event.type === "disconnected") abortAgent.abort() + }) -// // Graceful shutdown handling -// const cleanup = () => { -// log.info("🛑 Shutting down, stopping agent process") -// if (agentProcess?.stdin) { -// agentProcess.stdin.write("quit\n") -// } -// } + for await (const chunk of listenPlayback.stream()) { + if (abortAgent.signal.aborted) { + agent.stop() + listenPlayback.stop() + backgroundNoisePlayback?.stop() -// process.on("SIGINT", cleanup) -// process.on("SIGTERM", cleanup) -// process.on("exit", cleanup) -// } + resolve() + break + } -// const handleAgentEvents = (agent: Agent) => { -// agent.events.connect(async (event) => { -// switch (event.type) { -// case "connected": -// console.log("✅ Connected to AI agent\n") -// break + if (waitingForVoice) { + preConnectionBuffer.push(chunk) + if (preConnectionBuffer.length > maxPreBufferChunks) { + preConnectionBuffer.shift() + } -// case "user_transcript": -// console.log(`👤 You: ${event.transcript}`) -// break + const rms = Buzz.calculateRMS(chunk) + if (rms > 5000) { + dialTonePlayback?.stop() + service.send({ type: "start-agent" }) + waitingForVoice = false -// case "agent_response": -// console.log(`🤖 Agent: ${event.response}`) -// break + backgroundNoisePlayback = await player.play(getSound("background"), { repeat: true }) -// case "audio": -// await waitingIndicator.stop() -// const audioBuffer = Buffer.from(event.audioBase64, "base64") -// streamPlayback.write(audioBuffer) -// break + await agent.start() -// case "interruption": -// console.log("🛑 User interrupted") -// streamPlayback?.stop() -// streamPlayback = player.playStream() // Reset playback stream -// break + // Send pre-buffered audio + for (const chunk of preConnectionBuffer) agent.sendAudio(chunk) + preConnectionBuffer = [] + } + } else { + agent.sendAudio(chunk) + } + } + }) -// case "tool_call": -// waitingIndicator.start(streamPlayback) -// console.log(`🔧 Tool call: ${event.name}(${JSON.stringify(event.args)})`) -// break + return () => abortAgent.abort() +} -// case "tool_result": -// console.log(`✅ Tool result: ${JSON.stringify(event.result)}`) -// break +const handleAgentEvents = ( + service: Service, + agent: Agent, + streamPlayback: Buzz.StreamingPlayback, +) => { + const waitingIndicator = new WaitingSounds(player) -// case "tool_error": -// console.error(`❌ Tool error: ${event.error}`) -// break + agent.events.connect(async (event) => { + switch (event.type) { + case "connected": + log("🤖 Connected to AI agent\n") + break -// case "disconnected": -// console.log("\n👋 Conversation ended, returning to dialtone\n") -// streamPlayback?.stop() -// state = "WAITING_FOR_VOICE" -// phoneService.send({ type: "remote_hang_up" }) -// break + case "user_transcript": + log(`🤖 You: ${event.transcript}`) + break -// case "error": -// console.error("Agent error:", event.error) -// break + case "agent_response": + log(`🤖 Agent: ${event.response}`) + break -// case "ping": -// break + case "audio": + await waitingIndicator.stop() + const audioBuffer = Buffer.from(event.audioBase64, "base64") + streamPlayback.write(audioBuffer) + break -// default: -// console.log(`😵‍💫 ${event.type}`) -// break -// } -// }) -// } + case "interruption": + log("🤖 User interrupted") + streamPlayback?.stop() + streamPlayback = player.playStream() // Reset playback stream + break -// const incomingCallRing = (): CancelableTask => { -// let abortController = new AbortController() + case "tool_call": + waitingIndicator.start(streamPlayback) + log(`🤖 Tool call: ${event.name}(${JSON.stringify(event.args)})`) + break -// const playRingtone = async () => { -// while (!abortController.signal.aborted) { -// await ring(2000, abortController.signal) -// await sleep(4000) -// } -// } -// playRingtone().catch((error) => log.error("Ringer error:", error)) + case "tool_result": + log(`🤖 Tool result: ${JSON.stringify(event.result)}`) + break -// return () => abortController.abort() -// } + case "tool_error": + console.error(`❌ Tool error: ${event.error}`) + break -// const handleError = (ctx: PhoneContext, event: { type: "error"; message?: string }) => { -// ctx.lastError = event.message -// log.error(`Phone error: ${event.message}`) -// return ctx -// } + case "disconnected": + log(`🤖 👋 Conversation ended, returning to dialtone`) + streamPlayback?.stop() + service.send({ type: "remote-hang-up" }) + break -// const incomingCall = (ctx: PhoneContext, event: { type: "incoming_call"; from?: string }) => { -// ctx.peer = event.from -// ctx.cancelRinger = incomingCallRing() -// log.info(`Incoming call from ${event.from}`) + case "error": + log.error("🤖 Agent error:", event.error) + break -// return ctx -// } + case "ping": + break -// const stopRinger = (ctx: PhoneContext) => { -// ctx.cancelRinger?.() -// ctx.cancelRinger = undefined -// return ctx -// } + default: + log.debug(`😵 Unknown agent event ${event.type}`) + break + } + }) +} -// const playDialTone = (ctx: PhoneContext) => { -// const tone = new ToneGenerator() +const stopAgent = (ctx: PhoneContext) => { + ctx.stopAgent?.() + ctx.stopAgent = undefined + return ctx +} -// tone.loopTone([350, 440]) +const incomingCall = (ctx: PhoneContext, event: { type: "incoming-call"; from?: string }) => { + ctx.peer = event.from + return ctx +} -// ctx.cancelDialTone = () => { -// tone.stop() -// } +const hangUp = (ctx: PhoneContext) => { + console.log(`📞 Hanging up call`) + ctx.baresip.hangUp() +} -// return ctx -// } +const answerCall = (ctx: PhoneContext) => { + log(`📞 Answering call`) + ctx.baresip.accept() +} -// const playOutgoingTone = () => { -// const tone = new ToneGenerator() -// let canceled = false +const makeCall = async (ctx: PhoneContext) => { + log(`Dialing number: ${ctx.numberDialed}`) + if (ctx.numberDialed === 1) { + ctx.baresip.dial("+13476229543") + } else if (ctx.numberDialed === 2) { + ctx.baresip.dial("+18109643563") + } else { + log.error(`No contact for number dialed: ${ctx.numberDialed}`) + } -// const play = async () => { -// while (!canceled) { -// await tone.playTone([440, 480], 2000) -// await sleep(4000) -// } -// } + return ctx +} -// play().catch((error) => log.error("Outgoing tone error:", error)) +const startRinger = async (ctx: PhoneContext) => { + let abortController = new AbortController() + const keepRinging = async () => { + while (!abortController.signal.aborted) { + await ring(ctx.ringer, 2000, abortController.signal) + await sleep(4000) + } + } + keepRinging().catch((error) => log.error("Ringer error:", error)) -// return () => { -// tone.stop() -// canceled = true -// } -// } + ctx.cancelRinger = () => abortController.abort() -// const dialStart = (ctx: PhoneContext) => { -// ctx.numberDialed = 0 -// ctx = stopDialTone(ctx) + return ctx +} -// return ctx -// } +const stopRinger = (ctx: PhoneContext) => { + ctx.cancelRinger?.() + ctx.cancelRinger = undefined + return ctx +} -// const makeCall = (ctx: PhoneContext) => { -// log.info(`Dialing number: ${ctx.numberDialed}`) -// if (ctx.numberDialed === 1) { -// ctx.baresip.dial("+13476229543") -// } else if (ctx.numberDialed === 2) { -// ctx.baresip.dial("+18109643563") -// } else { -// const playTone = async () => { -// const tone = new ToneGenerator() -// await tone.playTone([900], 200) -// await tone.playTone([1350], 200) -// await tone.playTone([1750], 200) -// } -// playTone().catch((error) => log.error("Error playing tone:", error)) -// } +async function startDialToneAndAgent(this: any, ctx: PhoneContext) { + ctx = await startAgent(this, ctx) -// return ctx -// } + await dialTonePlayback?.stop() + dialTonePlayback = await player.playTone([350, 440], Infinity) -// const makeAgentCall = (ctx: PhoneContext) => { -// log.info(`Calling agent`) -// ctx.cancelAgent = ctx.startAgent() + return ctx +} -// return ctx -// } +const stopDialTone = () => { + dialTonePlayback?.stop() +} -// const callAgentGuard = (ctx: PhoneContext) => { -// return ctx.numberDialed === 10 -// } +const dialStart = (ctx: PhoneContext) => { + ctx.numberDialed = 0 + return ctx +} -// const callAnswered = (ctx: PhoneContext) => { -// ctx.baresip.accept() +const digitIncrement = (ctx: PhoneContext) => { + ctx.numberDialed += 1 + return ctx +} -// ctx.cancelDialTone?.() -// ctx.cancelDialTone = undefined +const t = transition +const r = reduce +const a = action -// ctx.cancelRinger?.() -// ctx.cancelRinger = undefined +const phoneMachine = createMachine( + "booting", + // prettier-ignore + { + booting: state( + t("config", "initializing", r(config)) + ), + initializing: state( + t("initialized", "idle"), + t("pick-up", "ready"), + t("error", "fault", r(handleError))), + idle: state( + t("incoming-call", "incoming", r(incomingCall)), + t("pick-up", "ready")), + incoming: invoke(startRinger, + t("remote-hang-up", "idle", r(stopRinger)), + t("pick-up", "connected", r(stopRinger), a(answerCall))), + connected: state( + t("remote-hang-up", "ready"), + t("hang-up", "idle", a(hangUp))), + ready: invoke(startDialToneAndAgent, + t("dial-start", "dialing", a(stopDialTone), r(dialStart), a(stopAgent)), + t("hang-up", "idle", a(stopDialTone), a(stopAgent)), + t("start-agent", "connectToAgent", a(stopDialTone))), + connectToAgent: state( + t("hang-up", "idle", r(stopAgent)), + t("remote-hang-up", "ready", r(stopAgent))), + dialing: state( + t("dial-stop", "outgoing"), + t("digit_increment", "dialing", r(digitIncrement)), + t("hang-up", "idle")), + outgoing: invoke(makeCall, + t("answered", "connected"), + t("hang-up", "idle", a(hangUp))), + aborted: state( + t("hang-up", "idle")), + fault: state(), + }, +) -// return ctx -// } - -// const stopCall = (ctx: PhoneContext) => { -// ctx.baresip.hangUp() -// return ctx -// } - -// const stopAgent = (ctx: PhoneContext) => { -// log.info("🛑 Stopping agent") -// ctx.cancelAgent?.() -// ctx.cancelAgent = undefined -// return ctx -// } - -// const stopDialTone = (ctx: PhoneContext) => { -// ctx.cancelDialTone?.() -// ctx.cancelDialTone = undefined - -// return ctx -// } - -// const digitIncrement = (ctx: PhoneContext) => { -// ctx.numberDialed += 1 -// return ctx -// } +d._onEnter = function (machine, to, state, prevState, event) { + log(`📱 ${machine.current} -> ${to} (${(event as any).type})`) +} diff --git a/src/pins/index.ts b/src/pins/index.ts index 34b0606..a856869 100644 --- a/src/pins/index.ts +++ b/src/pins/index.ts @@ -82,6 +82,8 @@ namespace GPIO { export type InputOptions = Type.InputOptions export type OutputOptions = Type.OutputOptions export type InputEvent = Type.InputEvent + export type Input = import("./input").Input + export type Output = import("./output").Output } export default GPIO diff --git a/src/sip.ts b/src/sip.ts index afbe9a6..722a4fe 100644 --- a/src/sip.ts +++ b/src/sip.ts @@ -1,4 +1,4 @@ -import { log } from "./utils/log.ts" +import log from "./utils/log.ts" import { Signal } from "./utils/signal.ts" import { processStdout, processStderr } from "./utils/stdio.ts" diff --git a/src/test-operator.ts b/src/test-operator.ts index ad33fd4..dc04b9b 100755 --- a/src/test-operator.ts +++ b/src/test-operator.ts @@ -1,5 +1,4 @@ import Buzz from "./buzz/index.ts" -import type { Playback } from "./buzz/utils.ts" import { Agent } from "./agent/index.ts" import { searchWeb } from "./agent/tools.ts" import { getSound, WaitingSounds } from "./utils/waiting-sounds.ts" @@ -19,8 +18,8 @@ const runPhoneSystem = async (agentId: string, apiKey: string) => { }, }) - let currentDialtone: Playback | undefined - let currentBackgroundNoise: Playback | undefined + let currentDialtone: Buzz.Playback | undefined + let currentBackgroundNoise: Buzz.Playback | undefined let streamPlayback = player.playStream() const waitingIndicator = new WaitingSounds(player) diff --git a/src/test-pins.ts b/src/test-pins.ts index 0ff95c2..561c9e0 100644 --- a/src/test-pins.ts +++ b/src/test-pins.ts @@ -1,4 +1,4 @@ -import { GPIO } from "./pins" +import GPIO from "./pins" console.log(`kill -9 ${process.pid}`) diff --git a/src/utils/index.ts b/src/utils/index.ts index 0d0fcd6..4a06864 100644 --- a/src/utils/index.ts +++ b/src/utils/index.ts @@ -1,3 +1,5 @@ +import type GPIO from "../pins" + export const ensure = (value: T, message: string): T => { if (value === undefined || value === null) { throw new Error(message) @@ -9,3 +11,17 @@ export const ensure = (value: T, message: string): T => { export const random = (arr: ReadonlyArray): T => { return arr[Math.floor(Math.random() * arr.length)]! } + +export const ring = async (ringer: GPIO.Output, duration: number, signal?: AbortSignal) => { + try { + const endAt = performance.now() + duration + while (performance.now() < endAt && !signal?.aborted) { + ringer.value = 1 + await Bun.sleep(50) + ringer.value = 0 + await Bun.sleep(50) + } + } finally { + ringer.value = 0 + } +} diff --git a/src/utils/log.ts b/src/utils/log.ts index 301611f..33e1c00 100644 --- a/src/utils/log.ts +++ b/src/utils/log.ts @@ -1,21 +1,21 @@ let showDebug = true let showInfo = true -let showError = true -export function setLogLevel(level: "debug" | "info" | "error" | "none") { +export function setLogLevel(level: "debug" | "info" | "error") { showDebug = level === "debug" showInfo = level === "debug" || level === "info" - showError = level !== "none" } -export const log = { - debug: (...args: any[]) => { - if (showDebug) console.debug("DEBUG: ", ...args) - }, - info: (...args: any[]) => { - if (showInfo) console.log("INFO: ", ...args) - }, - error: (...args: any[]) => { - if (showError) console.error("ERROR: ", ...args) - }, +const log = (...args: any[]) => { + if (showInfo) console.log("👁️‍🗨️ INFO: ", ...args) } + +log.debug = (...args: any[]) => { + if (showDebug) console.debug("🪲 DEBUG: ", ...args) +} + +log.error = (...args: any[]) => { + console.error("💥 ERROR: ", ...args) +} + +export default log diff --git a/src/utils/signal.ts b/src/utils/signal.ts index c10609c..e537562 100644 --- a/src/utils/signal.ts +++ b/src/utils/signal.ts @@ -13,10 +13,6 @@ * Emit a signal: * chatSignal.emit({ username: "Chad", message: "Hey everyone, how's it going?" }); * - * Forward a signal: - * const relaySignal = new Signal<{ username: string, message: string }>() - * const disconnectRelay = chatSignal.connect(relaySignal) - * // Now, when chatSignal emits, relaySignal will also emit the same data * * Disconnect a single listener: * disconnect(); // The disconnect function is returned when you connect to a signal @@ -55,3 +51,46 @@ export class Signal { this.listeners = [] } } + +/** + * How to use Emitter: + * + * Create an emitter: + * const chat = new Emitter<{ username: string, message: string }>() + * + * Listen to events: + * const off = chat.on((data) => { + * const {username, message} = data; + * console.log(`${username} said "${message}"`); + * }) + * + * Emit an event: + * chat.emit({ username: "Chad", message: "Hey everyone, how's it going?" }); + * + * Remove a specific listener: + * off(); // The off function is returned when you add a listener + * + * Remove all listeners: + * chat.removeAllListeners() + */ +export class Emitter { + private listeners: Array<(data: T) => void> = [] + + on(listener: (data: T) => void) { + this.listeners.push(listener) + + return () => { + this.listeners = this.listeners.filter((l) => l !== listener) + } + } + + emit(data: T) { + for (const listener of this.listeners) { + listener(data) + } + } + + removeAllListeners() { + this.listeners = [] + } +} diff --git a/src/utils/stdio.ts b/src/utils/stdio.ts index 0165074..a1d2888 100644 --- a/src/utils/stdio.ts +++ b/src/utils/stdio.ts @@ -1,4 +1,4 @@ -import { log } from "./log.ts" +import log from "./log.ts" export const LineSplitter = () => { let buffer = "" diff --git a/src/utils/waiting-sounds.ts b/src/utils/waiting-sounds.ts index 5d4e4e2..2d66f93 100644 --- a/src/utils/waiting-sounds.ts +++ b/src/utils/waiting-sounds.ts @@ -1,15 +1,14 @@ -import { type Player } from "../buzz/index.ts" +import Buzz from "../buzz/index.ts" import { join } from "path" -import type { Playback, StreamingPlayback } from "../buzz/utils.ts" import { random } from "./index.ts" export class WaitingSounds { - typingPlayback?: Playback - speakingPlayback?: Playback + typingPlayback?: Buzz.Playback + speakingPlayback?: Buzz.Playback - constructor(private player: Player) {} + constructor(private player: Buzz.Player) {} - async start(operatorStream: StreamingPlayback) { + async start(operatorStream: Buzz.StreamingPlayback) { if (this.typingPlayback) return // Already playing this.#startTypingSounds() @@ -35,7 +34,7 @@ export class WaitingSounds { }) } - async #startSpeakingSounds(operatorStream: StreamingPlayback) { + async #startSpeakingSounds(operatorStream: Buzz.StreamingPlayback) { const playedSounds = new Set() let dir: SoundDir | undefined return new Promise(async (resolve) => {