demo
This commit is contained in:
parent
8b8baf9151
commit
76f394b852
11
bun.lock
11
bun.lock
|
|
@ -146,6 +146,7 @@
|
|||
"@workshop/shared": "workspace:*",
|
||||
"hono": "catalog:",
|
||||
"luxon": "^3.7.1",
|
||||
"pixabay-api": "^1.0.4",
|
||||
"pngjs": "^7.0.0",
|
||||
"tailwind": "^4.0.0",
|
||||
"zod": "catalog:",
|
||||
|
|
@ -284,6 +285,8 @@
|
|||
|
||||
"available-typed-arrays": ["available-typed-arrays@1.0.7", "", { "dependencies": { "possible-typed-array-names": "^1.0.0" } }, "sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ=="],
|
||||
|
||||
"axios": ["axios@0.16.2", "", { "dependencies": { "follow-redirects": "^1.2.3", "is-buffer": "^1.1.5" } }, "sha512-IMYFDrcVbUksQhsMYtWCM6KdNaDpr1NY56dpzaIgj92ecPVI29bf2sOgAf8aGTiq8UoixJD61Pj0Ahej5DPv7w=="],
|
||||
|
||||
"babel-runtime": ["babel-runtime@6.26.0", "", { "dependencies": { "core-js": "^2.4.0", "regenerator-runtime": "^0.11.0" } }, "sha512-ITKNuq2wKlW1fJg9sSW52eepoYgZBggvOAHC0u/CYu/qxQ9EVzThCgR69BnSXLHjy2f7SY5zaQ4yt7H9ZVxY2g=="],
|
||||
|
||||
"base64-js": ["base64-js@1.5.1", "", {}, "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA=="],
|
||||
|
|
@ -424,6 +427,8 @@
|
|||
|
||||
"flaschenpost": ["flaschenpost@1.1.3", "", { "dependencies": { "@babel/runtime": "7.2.0", "app-root-path": "2.1.0", "babel-runtime": "6.26.0", "chalk": "2.4.1", "find-root": "1.1.0", "lodash": "4.17.11", "moment": "2.22.2", "processenv": "1.1.0", "split2": "3.0.0", "stack-trace": "0.0.10", "stringify-object": "3.3.0", "untildify": "3.0.3", "util.promisify": "1.0.0", "varname": "2.0.3" }, "bin": { "flaschenpost-uncork": "dist/bin/flaschenpost-uncork.js", "flaschenpost-normalize": "dist/bin/flaschenpost-normalize.js" } }, "sha512-1VAYPvDsVBGFJyUrOa/6clnJwZYC3qVq9nJLcypy6lvaaNbo1wOQiH8HQ+4Fw/k51pVG7JHzSf5epb8lmIW86g=="],
|
||||
|
||||
"follow-redirects": ["follow-redirects@1.15.9", "", {}, "sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ=="],
|
||||
|
||||
"for-each": ["for-each@0.3.5", "", { "dependencies": { "is-callable": "^1.2.7" } }, "sha512-dKx12eRCVIzqCxFGplyFKJMPvLEWgmNtUrpTiJIR5u97zEhRG8ySrtboPHZXx7daLxQVrl643cTzbab2tkQjxg=="],
|
||||
|
||||
"formats": ["formats@1.0.0", "", {}, "sha512-For0Y8egwEK96JgJo4NONErPhtl7H2QzeB2NYGmzeGeJ8a1JZqPgLYOtM3oJRCYhmgsdDFd6KGRYyfe37XY4Yg=="],
|
||||
|
|
@ -498,6 +503,8 @@
|
|||
|
||||
"is-boolean-object": ["is-boolean-object@1.2.2", "", { "dependencies": { "call-bound": "^1.0.3", "has-tostringtag": "^1.0.2" } }, "sha512-wa56o2/ElJMYqjCjGkXri7it5FbebW5usLw/nPmCMs5DeZ7eziSYZhSmPRn0txqeW4LnAmQQU7FgqLpsEFKM4A=="],
|
||||
|
||||
"is-buffer": ["is-buffer@1.1.6", "", {}, "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w=="],
|
||||
|
||||
"is-callable": ["is-callable@1.2.7", "", {}, "sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA=="],
|
||||
|
||||
"is-data-view": ["is-data-view@1.0.2", "", { "dependencies": { "call-bound": "^1.0.2", "get-intrinsic": "^1.2.6", "is-typed-array": "^1.1.13" } }, "sha512-RKtWF8pGmS87i2D6gqQu/l7EYRlVdfzemCJN/P3UOs//x1QE7mfhvzHIApBTRf7axvT6DMGwSwBXYCT0nfB9xw=="],
|
||||
|
|
@ -644,6 +651,8 @@
|
|||
|
||||
"path-to-regexp": ["path-to-regexp@0.1.7", "", {}, "sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ=="],
|
||||
|
||||
"pixabay-api": ["pixabay-api@1.0.4", "", { "dependencies": { "@types/node": "^8.0.4", "axios": "^0.16.2" } }, "sha512-OmV0ciG+Ouosn8csp8fBta32HFAfYurKUYb4vgZphIiPneXHS4x3bNilSWiWpU7SdWAGBXnAKQWsl1s2g7E8eQ=="],
|
||||
|
||||
"pkce-challenge": ["pkce-challenge@5.0.0", "", {}, "sha512-ueGLflrrnvwB3xuo/uGob5pd5FN7l0MsLf0Z87o/UQmRtwjvfylfc9MurIxRAWywCYTgrvpXBcqjV4OfCYGCIQ=="],
|
||||
|
||||
"pngjs": ["pngjs@7.0.0", "", {}, "sha512-LKWqWJRhstyYo9pGvgor/ivk2w94eSjE3RGVuzLGlr3NmD8bf7RcYGze1mNdEHRP6TRP6rMuDHk5t44hnTRyow=="],
|
||||
|
|
@ -880,6 +889,8 @@
|
|||
|
||||
"morgan/debug": ["debug@2.6.9", "", { "dependencies": { "ms": "2.0.0" } }, "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA=="],
|
||||
|
||||
"pixabay-api/@types/node": ["@types/node@8.10.66", "", {}, "sha512-tktOkFUA4kXx2hhhrB8bIFb5TbwzS4uOhKEmwiD+NoiL0qtP2OQ9mFldbgD4dV1djrlBYP6eBuQZiWjuHUpqFw=="],
|
||||
|
||||
"router/depd": ["depd@2.0.0", "", {}, "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw=="],
|
||||
|
||||
"router/path-to-regexp": ["path-to-regexp@8.2.0", "", {}, "sha512-TdrF7fW9Rphjq4RjrW0Kp2AW0Ahwu9sRGTkS6bvDi0SCwZlEZYmcfDbEsTz8RVk0EHIS/Vd1bv3JhG+1xZuAyQ=="],
|
||||
|
|
|
|||
|
|
@ -1,15 +1,5 @@
|
|||
# project-whitespace
|
||||
# whitespace
|
||||
|
||||
To install dependencies:
|
||||
# Demo
|
||||
|
||||
```bash
|
||||
bun install
|
||||
```
|
||||
|
||||
To run:
|
||||
|
||||
```bash
|
||||
bun run index.ts
|
||||
```
|
||||
|
||||
This project was created using `bun init` in bun v1.2.18. [Bun](https://bun.sh) is a fast all-in-one JavaScript runtime.
|
||||
https://share.cleanshot.com/94tmJzCw
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
"@workshop/shared": "workspace:*",
|
||||
"hono": "catalog:",
|
||||
"luxon": "^3.7.1",
|
||||
"pixabay-api": "^1.0.4",
|
||||
"pngjs": "^7.0.0",
|
||||
"tailwind": "^4.0.0",
|
||||
"zod": "catalog:"
|
||||
|
|
|
|||
Binary file not shown.
|
Before Width: | Height: | Size: 987 KiB After Width: | Height: | Size: 842 KiB |
|
|
@ -1,23 +0,0 @@
|
|||
import { tool, RealtimeAgent } from "@openai/agents/realtime"
|
||||
import { run } from "@openai/agents"
|
||||
|
||||
// 1. Define a tool to fetch the latest whiteboard image
|
||||
const fetchWhiteboard = tool({
|
||||
name: "fetchWhiteboard",
|
||||
description: "Fetch the latest whiteboard image and return its bytes",
|
||||
parameters: undefined,
|
||||
execute: async () => {
|
||||
return await Bun.file("public/whiteboard.png").arrayBuffer()
|
||||
},
|
||||
})
|
||||
|
||||
async function main() {
|
||||
const agent = new RealtimeAgent({
|
||||
name: "Spike",
|
||||
instructions: "When asked to analyze the whiteboard, call fetchWhiteboard",
|
||||
tools: [fetchWhiteboard],
|
||||
})
|
||||
|
||||
const result = await run(agent, "Hey Spike, analyze the whiteboard.")
|
||||
console.log("Agent response:", result.finalOutput)
|
||||
}
|
||||
|
|
@ -1,132 +0,0 @@
|
|||
import cvReady from "@techstark/opencv-js"
|
||||
import { PNG } from "pngjs"
|
||||
|
||||
type Element = {
|
||||
ymin: number
|
||||
xmin: number
|
||||
ymax: number
|
||||
xmax: number
|
||||
label: string
|
||||
}
|
||||
type StructuredResponse = { elements: Element[] }
|
||||
|
||||
export const detectShapes = async (
|
||||
imageBuffer: ArrayBuffer,
|
||||
minAreaPercent = 5,
|
||||
maxAreaPercent = 33
|
||||
): Promise<StructuredResponse> => {
|
||||
const cv = await cvReady
|
||||
|
||||
// 1. Load & decode PNG → raw RGBA buffer
|
||||
const buf = Buffer.from(imageBuffer)
|
||||
const { width, height, data } = PNG.sync.read(buf)
|
||||
|
||||
// 2. Create a 4-ch Mat from RGBA pixels
|
||||
const srcRGBA = cv.matFromArray(height, width, cv.CV_8UC4, new Uint8Array(data))
|
||||
|
||||
// 3. Convert → gray → blur → threshold
|
||||
const gray = new cv.Mat()
|
||||
cv.cvtColor(srcRGBA, gray, cv.COLOR_RGBA2GRAY)
|
||||
cv.GaussianBlur(gray, gray, new cv.Size(5, 5), 0)
|
||||
|
||||
const thresh = new cv.Mat()
|
||||
cv.adaptiveThreshold(gray, thresh, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY_INV, 11, 2)
|
||||
// Morphological opening to remove small noise
|
||||
const removeNoise = (mat: cvReady.Mat, kSize = 3) => {
|
||||
const kernel = cv.getStructuringElement(cv.MORPH_RECT, new cv.Size(kSize, kSize))
|
||||
cv.morphologyEx(mat, mat, cv.MORPH_OPEN, kernel)
|
||||
kernel.delete()
|
||||
}
|
||||
// Morphological closing to bridge gaps in contours
|
||||
const closeGaps = (mat: cvReady.Mat, kSize = 7) => {
|
||||
const kernel = cv.getStructuringElement(cv.MORPH_RECT, new cv.Size(kSize, kSize))
|
||||
cv.morphologyEx(mat, mat, cv.MORPH_CLOSE, kernel)
|
||||
kernel.delete()
|
||||
}
|
||||
removeNoise(thresh, 3)
|
||||
closeGaps(thresh, 7)
|
||||
|
||||
// 4. Find contours
|
||||
const contours = new cv.MatVector()
|
||||
const hierarchy = new cv.Mat()
|
||||
cv.findContours(thresh, contours, hierarchy, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
const norm = (v: number, max: number) => Math.round((v / max) * 1000)
|
||||
const totalImageArea = width * height
|
||||
const elements: Element[] = []
|
||||
|
||||
for (let i = 0; i < contours.size(); i++) {
|
||||
const cnt = contours.get(i)
|
||||
const rect = cv.boundingRect(cnt)
|
||||
|
||||
// Skip shapes whose bounding box touches the image border
|
||||
if (rect.x === 0 || rect.y === 0 || rect.x + rect.width === width || rect.y + rect.height === height) {
|
||||
// console.log(
|
||||
// `-- skip: boundingRect touches border rect=(${rect.x},${rect.y},${rect.width},${rect.height})`
|
||||
// )
|
||||
cnt.delete()
|
||||
continue
|
||||
}
|
||||
|
||||
// Calculate area based on bounding box
|
||||
const rectArea = rect.width * rect.height
|
||||
const areaPercent = (rectArea / totalImageArea) * 100
|
||||
|
||||
// Basic filtering (lower bound only; upper bound filter disabled)
|
||||
if (areaPercent < minAreaPercent) {
|
||||
// cnt.delete()
|
||||
continue
|
||||
} else if (areaPercent > maxAreaPercent) {
|
||||
// cnt.delete()
|
||||
continue
|
||||
}
|
||||
// console.log(`-- upper bound filter disabled (areaPercent=${areaPercent.toFixed(2)} > maxAreaPercent=${maxAreaPercent})`)
|
||||
|
||||
/*
|
||||
const margin = Math.min(width, height) * 0.05
|
||||
if (
|
||||
rect.x < margin ||
|
||||
rect.y < margin ||
|
||||
rect.x + rect.width > width - margin ||
|
||||
rect.y + rect.height > height - margin
|
||||
) {
|
||||
// cnt.delete()
|
||||
continue
|
||||
}
|
||||
*/
|
||||
|
||||
// Simple shape classification
|
||||
const peri = cv.arcLength(cnt, true)
|
||||
const approx = new cv.Mat()
|
||||
cv.approxPolyDP(cnt, approx, 0.02 * peri, true)
|
||||
|
||||
let label = "polygon"
|
||||
if (approx.rows === 3) label = "triangle"
|
||||
else if (approx.rows === 4) {
|
||||
const aspectRatio = rect.width / rect.height
|
||||
label = Math.abs(aspectRatio - 1) < 0.2 ? "square" : "rectangle"
|
||||
} else if (approx.rows > 6) label = "circle"
|
||||
|
||||
elements.push({
|
||||
ymin: norm(rect.y, gray.rows),
|
||||
xmin: norm(rect.x, gray.cols),
|
||||
ymax: norm(rect.y + rect.height, gray.rows),
|
||||
xmax: norm(rect.x + rect.width, gray.cols),
|
||||
label,
|
||||
})
|
||||
|
||||
console.log(
|
||||
`-- accepted shape #${i}: ${label} (${rect.x},${rect.y},${rect.width},${
|
||||
rect.height
|
||||
}) area=${areaPercent.toFixed(2)}%`
|
||||
)
|
||||
|
||||
cnt.delete()
|
||||
approx.delete()
|
||||
}
|
||||
|
||||
// 5. Cleanup
|
||||
;[srcRGBA, gray, thresh, contours, hierarchy].forEach((m: any) => m.delete())
|
||||
|
||||
return { elements }
|
||||
}
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
{
|
||||
"elements": [
|
||||
{
|
||||
"ymin": 583,
|
||||
"xmin": 97,
|
||||
"ymax": 744,
|
||||
"xmax": 392,
|
||||
"label": "rectangle"
|
||||
},
|
||||
{
|
||||
"ymin": 471,
|
||||
"xmin": 455,
|
||||
"ymax": 680,
|
||||
"xmax": 664,
|
||||
"label": "circle"
|
||||
},
|
||||
{
|
||||
"ymin": 349,
|
||||
"xmin": 173,
|
||||
"ymax": 442,
|
||||
"xmax": 296,
|
||||
"label": "circle"
|
||||
},
|
||||
{
|
||||
"ymin": 303,
|
||||
"xmin": 432,
|
||||
"ymax": 466,
|
||||
"xmax": 589,
|
||||
"label": "circle"
|
||||
},
|
||||
{
|
||||
"ymin": 49,
|
||||
"xmin": 87,
|
||||
"ymax": 255,
|
||||
"xmax": 368,
|
||||
"label": "circle"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -16,7 +16,7 @@ const categories = [
|
|||
const prompts = {
|
||||
default: `Detect all of the of the following objects: ${categories}. The box_2d should be an object with ymin, xmin, ymax, xmax properties normalized to 0-1000.`,
|
||||
simple: `Detect the 2d bounding boxes of the following objects: ${categories}.`,
|
||||
specific: `Detect 2d inscribed box for the green circle?`,
|
||||
specific: `Detect 2d bounding box for the tea kettle in the image. The box_2d should be an object with ymin, xmin, ymax, xmax properties normalized to 0-1000.`,
|
||||
}
|
||||
|
||||
export const action = async (req: Request, params: {}) => {
|
||||
|
|
|
|||
|
|
@ -1,64 +0,0 @@
|
|||
import { Form, useAction } from "@workshop/nano-remix"
|
||||
import { useEffect, useRef } from "hono/jsx"
|
||||
import { RealtimeAgent, RealtimeSession } from "@openai/agents/realtime"
|
||||
import { ensure } from "@workshop/shared/utils"
|
||||
|
||||
export const action = async (request: Request) => {
|
||||
const response = await fetch("https://api.openai.com/v1/realtime/sessions", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: "gpt-4o-realtime-preview-2025-06-03",
|
||||
}),
|
||||
})
|
||||
const { client_secret } = await response.json()
|
||||
|
||||
return { secret: client_secret?.value }
|
||||
}
|
||||
|
||||
export default function Voice() {
|
||||
const { data, loading, error } = useAction<typeof action>()
|
||||
const session = useRef<RealtimeSession | undefined>(undefined)
|
||||
|
||||
useEffect(() => {
|
||||
if (!data?.secret) return
|
||||
if (session.current) return
|
||||
|
||||
session.current = createSession()
|
||||
session.current.connect({ apiKey: data.secret })
|
||||
}, [data?.secret])
|
||||
|
||||
return (
|
||||
<div>
|
||||
{error && <p>Error: {error}</p>}
|
||||
<p>Ephemeral Key: {loading ? "Loading..." : data?.secret}</p>
|
||||
<Form name="voiceForm">
|
||||
<button type="submit">Start Voice Session</button>
|
||||
</Form>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const createSession = () => {
|
||||
const agent = new RealtimeAgent({
|
||||
name: "Assistant",
|
||||
voice: "echo",
|
||||
instructions: `
|
||||
You are Spike, you are helping Corey at the whiteboard. Every question he asks will include a screenshot of the whiteboard. Sometimes his questions will be about the whiteboard, sometimes they will be about other things.
|
||||
|
||||
# Voice Tone
|
||||
|
||||
You have a very quiet and have a slight accent that is hard to place.
|
||||
|
||||
`,
|
||||
})
|
||||
const session = new RealtimeSession(agent)
|
||||
session.on("error", (error) => {
|
||||
console.error("Session error:", error)
|
||||
})
|
||||
|
||||
return session
|
||||
}
|
||||
|
|
@ -1,16 +1,21 @@
|
|||
import { useRef, useState, useEffect } from "hono/jsx"
|
||||
import { useStreamingAI } from "../useStreamingAI"
|
||||
import { useVideo } from "../useVideo"
|
||||
import { VideoOverlay, type OverlayItem } from "../videoOverlay"
|
||||
import { VideoOverlay } from "../videoOverlay"
|
||||
import "../index.css"
|
||||
import type { OverlayItem } from "../types"
|
||||
|
||||
export default function Voice() {
|
||||
const { audioError, transcript, isRecording: audioRecording, waitingForResponse } = useStreamingAI()
|
||||
const {
|
||||
audioError,
|
||||
transcript,
|
||||
isRecording: audioRecording,
|
||||
waitingForResponse,
|
||||
overlays,
|
||||
} = useStreamingAI()
|
||||
const videoRef = useRef<HTMLVideoElement>(null)
|
||||
const video = useVideo(videoRef)
|
||||
|
||||
const [overlays, setOverlays] = useState<OverlayItem[]>([])
|
||||
|
||||
let recordingStateClass = ""
|
||||
if (audioRecording) recordingStateClass = "border-red-500 border-4"
|
||||
else if (waitingForResponse) recordingStateClass = "border-yellow-500 border-4"
|
||||
|
|
@ -20,14 +25,12 @@ export default function Voice() {
|
|||
{audioError && <p class="text-red-500">Audio Error: {audioError}</p>}
|
||||
{video.error && <p class="text-red-500">Video Error: {video.error}</p>}
|
||||
|
||||
{transcript && <div class="absolute top-5 left-5 right-5 bg-white/90 p-4 rounded-lg">{transcript}</div>}
|
||||
|
||||
{!video.isRecording && (
|
||||
<button
|
||||
onClick={video.toggleRecording}
|
||||
class="px-4 py-2 text-8xl rounded-2xl text-white bg-green-500 hover:bg-green-600"
|
||||
class="px-4 uppercase py-2 text-8xl rounded-2xl text-white bg-green-500 hover:bg-green-600"
|
||||
>
|
||||
Start Camera
|
||||
Start THE Camera
|
||||
</button>
|
||||
)}
|
||||
|
||||
|
|
@ -41,6 +44,8 @@ export default function Voice() {
|
|||
/>
|
||||
</VideoOverlay>
|
||||
{video.isRecording && <div class="text-sm italic text-center">Hold Space to ask a question</div>}
|
||||
|
||||
{transcript && <div class="absolute top-5 left-5 right-5 bg-white/90 p-4 rounded-lg">{transcript}</div>}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
import { nanoRemix } from "@workshop/nano-remix"
|
||||
import { OpenAI } from "openai"
|
||||
import { Agent, run, type AgentInputItem } from "@openai/agents"
|
||||
import { Agent, run, webSearchTool, type AgentInputItem } from "@openai/agents"
|
||||
import fs from "node:fs"
|
||||
import { getErrorMessage } from "@workshop/shared/errors"
|
||||
import { tools } from "./tools"
|
||||
import { OverlayItemSchema } from "./types"
|
||||
import z from "zod"
|
||||
|
||||
Bun.serve({
|
||||
port: 3000,
|
||||
|
|
@ -37,8 +39,12 @@ const streamResponse = async (req: Request) => {
|
|||
const agent = new Agent({
|
||||
name: "Whiteboard Assistant",
|
||||
model: "gpt-4o",
|
||||
instructions: "You are a helpful assistant that talks about a whiteboard.",
|
||||
tools,
|
||||
instructions: `You are a helpful assistant that talks about an image.
|
||||
You will receive a transcript of a conversation and an image. Your task is to analyze the transcript and the image, then generate a response that includes text and optional overlays on the image.
|
||||
|
||||
The overlays are a string description of what you would overlay on the image.
|
||||
`,
|
||||
tools: [...tools, webSearchTool()],
|
||||
})
|
||||
|
||||
const imagePath = "public/whiteboard.png"
|
||||
|
|
@ -55,14 +61,41 @@ const streamResponse = async (req: Request) => {
|
|||
]
|
||||
|
||||
const result = await run(agent, input, { stream: true })
|
||||
const readableStream = result.toTextStream() as any // This DOES work, but typescript is a little confused so I cast it to any
|
||||
|
||||
console.log(`🌭`, readableStream)
|
||||
return new Response(readableStream, {
|
||||
const customStream = new ReadableStream({
|
||||
async start(controller) {
|
||||
try {
|
||||
for await (const chunk of result) {
|
||||
if (chunk.type === "raw_model_stream_event" && chunk.data?.type === "output_text_delta") {
|
||||
const event = {
|
||||
type: "text_delta",
|
||||
data: chunk.data.delta,
|
||||
}
|
||||
controller.enqueue(`data: ${JSON.stringify(event)}\n\n`)
|
||||
}
|
||||
|
||||
if (chunk.type === "run_item_stream_event" && chunk.item?.type === "tool_call_output_item") {
|
||||
const event = {
|
||||
type: "tool_output",
|
||||
data: chunk.item?.output,
|
||||
}
|
||||
controller.enqueue(`data: ${JSON.stringify(event)}\n\n`)
|
||||
}
|
||||
}
|
||||
|
||||
controller.close()
|
||||
} catch (error) {
|
||||
controller.error(error)
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
return new Response(customStream, {
|
||||
headers: {
|
||||
"Content-Type": "text/plain",
|
||||
"Content-Type": "text/event-stream",
|
||||
"Cache-Control": "no-cache",
|
||||
Connection: "keep-alive",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
},
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,14 +1,44 @@
|
|||
import { tool } from "@openai/agents"
|
||||
import z from "zod"
|
||||
import type { ImageOverlay } from "./types"
|
||||
import { searchImages } from "pixabay-api"
|
||||
import { getGeminiResponse } from "./ai"
|
||||
|
||||
const pixabayApiKey = "51428355-fea6dad6a1cb56273345b23b1"
|
||||
export const tools = [
|
||||
tool({
|
||||
name: "embed video",
|
||||
description: "Embed a video into the whiteboard",
|
||||
parameters: z.object({ video: z.string() }),
|
||||
execute(input, context) {
|
||||
const { video } = input
|
||||
return `Video embedded: ${video}`
|
||||
name: "create an image overlay",
|
||||
description: "Find an image to overlay on a video",
|
||||
parameters: z.object({
|
||||
whereToOverlay: z
|
||||
.string()
|
||||
.describe(
|
||||
"Where to overlay the image (e.g., 'in the red box', 'covering the hand', 'on the left side')"
|
||||
),
|
||||
imageQuery: z.string().describe("Search term for image"),
|
||||
}),
|
||||
async execute(input, context) {
|
||||
const response = await searchImages(pixabayApiKey, input.imageQuery, { per_page: 10 })
|
||||
const hit = response.hits[0]!
|
||||
|
||||
console.log(`🌭`, `Find the 2d bounding box for this "${input.whereToOverlay}"`)
|
||||
const image = await Bun.file("public/whiteboard.png").arrayBuffer()
|
||||
const boundingBox = await getGeminiResponse(
|
||||
image,
|
||||
`Find the 2d bounding box for this question "${input.whereToOverlay}"`
|
||||
)
|
||||
|
||||
const element = boundingBox?.elements[0]!
|
||||
const overlay: ImageOverlay = {
|
||||
type: "image",
|
||||
src: hit.webformatURL,
|
||||
xmin: element.xmin,
|
||||
ymin: element.ymin,
|
||||
xmax: element.xmax,
|
||||
ymax: element.ymax,
|
||||
}
|
||||
|
||||
return overlay
|
||||
},
|
||||
}),
|
||||
]
|
||||
|
|
|
|||
30
packages/whiteboard/src/types.ts
Normal file
30
packages/whiteboard/src/types.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import { z } from "zod"
|
||||
|
||||
export const TextOverlaySchema = z.object({
|
||||
type: z.literal("text"),
|
||||
xmin: z.number(),
|
||||
ymin: z.number(),
|
||||
xmax: z.number(),
|
||||
ymax: z.number(),
|
||||
text: z.string(),
|
||||
fontSize: z.number().optional().nullable(),
|
||||
fontFamily: z.string().optional().nullable(),
|
||||
color: z.string().optional().nullable(),
|
||||
strokeColor: z.string().optional().nullable(),
|
||||
strokeWidth: z.number().optional().nullable(),
|
||||
})
|
||||
|
||||
export const ImageOverlaySchema = z.object({
|
||||
type: z.literal("image"),
|
||||
xmin: z.number(),
|
||||
ymin: z.number(),
|
||||
xmax: z.number(),
|
||||
ymax: z.number(),
|
||||
src: z.string(),
|
||||
})
|
||||
|
||||
export const OverlayItemSchema = z.union([TextOverlaySchema, ImageOverlaySchema])
|
||||
|
||||
export type TextOverlay = z.infer<typeof TextOverlaySchema>
|
||||
export type ImageOverlay = z.infer<typeof ImageOverlaySchema>
|
||||
export type OverlayItem = z.infer<typeof OverlayItemSchema>
|
||||
|
|
@ -1,11 +1,13 @@
|
|||
import { useEffect, useRef, useState } from "hono/jsx"
|
||||
import { StreamingResponse } from "./streamingAI"
|
||||
import type { OverlayItem } from "./types"
|
||||
|
||||
export function useStreamingAI() {
|
||||
const [audioError, setAudioError] = useState<string>("")
|
||||
const [transcript, setTranscript] = useState<string>("")
|
||||
const [isRecording, setIsRecording] = useState(false)
|
||||
const [waitingForResponse, setWaitingForResponse] = useState(false)
|
||||
const [overlays, setOverlays] = useState<OverlayItem[]>([])
|
||||
const streamingResponseRef = useRef<StreamingResponse>(null)
|
||||
|
||||
const startRecording = async () => {
|
||||
|
|
@ -23,14 +25,44 @@ export function useStreamingAI() {
|
|||
const reader = await streamingResponseRef.current!.stop()
|
||||
setWaitingForResponse(false)
|
||||
const decoder = new TextDecoder()
|
||||
let buffer = ""
|
||||
|
||||
const overlayItems: OverlayItem[] = []
|
||||
while (true) {
|
||||
const { done, value } = await reader.read()
|
||||
if (done) break
|
||||
|
||||
const chunk = decoder.decode(value, { stream: true })
|
||||
setTranscript((prev) => prev + chunk)
|
||||
buffer += chunk
|
||||
|
||||
// Parse SSE messages
|
||||
const lines = buffer.split("\n")
|
||||
buffer = lines.pop() || ""
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith("data: ")) {
|
||||
try {
|
||||
const eventData = JSON.parse(line.slice(6))
|
||||
|
||||
if (eventData.type === "text_delta") {
|
||||
setTranscript((prev) => prev + eventData.data)
|
||||
} else if (eventData.type === "tool_output") {
|
||||
if (eventData.data.type === "image") {
|
||||
overlayItems.push(eventData.data)
|
||||
}
|
||||
} else if (eventData.type === "done") {
|
||||
console.log("Done")
|
||||
}
|
||||
} catch (e) {
|
||||
console.error("💥 Failed to parse SSE event:", line, e)
|
||||
}
|
||||
} else if (line.trim()) {
|
||||
console.error("💥 Non-data line:", line)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setOverlays(overlayItems)
|
||||
} catch (error) {
|
||||
console.error("Error during streaming:", error)
|
||||
setAudioError(`Streaming failed: ${error}`)
|
||||
|
|
@ -70,5 +102,6 @@ export function useStreamingAI() {
|
|||
waitingForResponse,
|
||||
startRecording,
|
||||
endRecording,
|
||||
overlays,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,27 +1,5 @@
|
|||
import { useRef, useEffect } from "hono/jsx"
|
||||
|
||||
export interface TextOverlay {
|
||||
type: "text"
|
||||
x: number
|
||||
y: number
|
||||
text: string
|
||||
fontSize?: number
|
||||
fontFamily?: string
|
||||
color?: string
|
||||
strokeColor?: string
|
||||
strokeWidth?: number
|
||||
}
|
||||
|
||||
export interface ImageOverlay {
|
||||
type: "image"
|
||||
x: number
|
||||
y: number
|
||||
src: string
|
||||
width?: number
|
||||
height?: number
|
||||
}
|
||||
|
||||
export type OverlayItem = TextOverlay | ImageOverlay
|
||||
import type { ImageOverlay, OverlayItem, TextOverlay } from "./types"
|
||||
|
||||
interface VideoOverlayProps {
|
||||
overlays: OverlayItem[]
|
||||
|
|
@ -50,7 +28,17 @@ export function VideoOverlay({ overlays, children, isRecording }: VideoOverlayPr
|
|||
canvas.height = rect.height
|
||||
|
||||
// Clear canvas
|
||||
ctx.clearRect(0, 0, canvas.width, canvas.height)
|
||||
// ctx.clearRect(0, 0, canvas.width, canvas.height)
|
||||
// ctx.fillStyle = "green"
|
||||
// const xmin = 250
|
||||
// const ymin = 250
|
||||
// const xmax = 750
|
||||
// const ymax = 750
|
||||
// const x = (xmin / 1000) * ctx.canvas.width
|
||||
// const y = (ymin / 1000) * ctx.canvas.height
|
||||
// const width = ((xmax - xmin) / 1000) * ctx.canvas.width
|
||||
// const height = ((ymax - ymin) / 1000) * ctx.canvas.height
|
||||
// ctx.fillRect(x, y, width, height)
|
||||
|
||||
// Draw overlays
|
||||
for (const overlay of overlays) {
|
||||
|
|
@ -65,7 +53,6 @@ export function VideoOverlay({ overlays, children, isRecording }: VideoOverlayPr
|
|||
// Redraw when overlay data changes or recording state changes
|
||||
useEffect(() => {
|
||||
setTimeout(() => {
|
||||
console.log(`🌭 `, canvasRef.current?.width, canvasRef.current?.height)
|
||||
drawOverlays()
|
||||
}, 1000)
|
||||
}, [overlays, isRecording])
|
||||
|
|
@ -78,8 +65,10 @@ export function VideoOverlay({ overlays, children, isRecording }: VideoOverlayPr
|
|||
|
||||
const drawText = (ctx: CanvasRenderingContext2D, overlay: TextOverlay) => {
|
||||
const {
|
||||
x,
|
||||
y,
|
||||
xmin,
|
||||
ymin,
|
||||
xmax,
|
||||
ymax,
|
||||
text,
|
||||
fontSize = 20,
|
||||
fontFamily = "Arial",
|
||||
|
|
@ -92,21 +81,26 @@ export function VideoOverlay({ overlays, children, isRecording }: VideoOverlayPr
|
|||
ctx.fillStyle = color
|
||||
ctx.strokeStyle = strokeColor
|
||||
ctx.lineWidth = strokeWidth
|
||||
|
||||
const x = (xmin / 1000) * ctx.canvas.width
|
||||
const y = (ymin / 1000) * ctx.canvas.height
|
||||
const width = ((xmax - xmin) / 1000) * ctx.canvas.width
|
||||
const height = ((ymax - ymin) / 1000) * ctx.canvas.height
|
||||
ctx.strokeText(text, x, y)
|
||||
ctx.fillText(text, x, y)
|
||||
}
|
||||
|
||||
const drawImage = (ctx: CanvasRenderingContext2D, overlay: ImageOverlay) => {
|
||||
const { x, y, src, width, height } = overlay
|
||||
const { xmin, ymin, xmax, ymax, src } = overlay
|
||||
|
||||
const img = new Image()
|
||||
img.crossOrigin = "anonymous"
|
||||
|
||||
img.onload = () => {
|
||||
const drawWidth = width || img.width
|
||||
const drawHeight = height || img.height
|
||||
ctx.drawImage(img, x, y, drawWidth, drawHeight)
|
||||
const x = (xmin / 1000) * ctx.canvas.width
|
||||
const y = (ymin / 1000) * ctx.canvas.height
|
||||
const width = ((xmax - xmin) / 1000) * ctx.canvas.width
|
||||
const height = ((ymax - ymin) / 1000) * ctx.canvas.height
|
||||
ctx.drawImage(img, x, y, width, height)
|
||||
}
|
||||
|
||||
img.src = src
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user