This commit is contained in:
Corey Johnson 2025-07-21 16:53:47 -07:00
parent 8b8baf9151
commit 76f394b852
15 changed files with 195 additions and 326 deletions

View File

@ -146,6 +146,7 @@
"@workshop/shared": "workspace:*",
"hono": "catalog:",
"luxon": "^3.7.1",
"pixabay-api": "^1.0.4",
"pngjs": "^7.0.0",
"tailwind": "^4.0.0",
"zod": "catalog:",
@ -284,6 +285,8 @@
"available-typed-arrays": ["available-typed-arrays@1.0.7", "", { "dependencies": { "possible-typed-array-names": "^1.0.0" } }, "sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ=="],
"axios": ["axios@0.16.2", "", { "dependencies": { "follow-redirects": "^1.2.3", "is-buffer": "^1.1.5" } }, "sha512-IMYFDrcVbUksQhsMYtWCM6KdNaDpr1NY56dpzaIgj92ecPVI29bf2sOgAf8aGTiq8UoixJD61Pj0Ahej5DPv7w=="],
"babel-runtime": ["babel-runtime@6.26.0", "", { "dependencies": { "core-js": "^2.4.0", "regenerator-runtime": "^0.11.0" } }, "sha512-ITKNuq2wKlW1fJg9sSW52eepoYgZBggvOAHC0u/CYu/qxQ9EVzThCgR69BnSXLHjy2f7SY5zaQ4yt7H9ZVxY2g=="],
"base64-js": ["base64-js@1.5.1", "", {}, "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA=="],
@ -424,6 +427,8 @@
"flaschenpost": ["flaschenpost@1.1.3", "", { "dependencies": { "@babel/runtime": "7.2.0", "app-root-path": "2.1.0", "babel-runtime": "6.26.0", "chalk": "2.4.1", "find-root": "1.1.0", "lodash": "4.17.11", "moment": "2.22.2", "processenv": "1.1.0", "split2": "3.0.0", "stack-trace": "0.0.10", "stringify-object": "3.3.0", "untildify": "3.0.3", "util.promisify": "1.0.0", "varname": "2.0.3" }, "bin": { "flaschenpost-uncork": "dist/bin/flaschenpost-uncork.js", "flaschenpost-normalize": "dist/bin/flaschenpost-normalize.js" } }, "sha512-1VAYPvDsVBGFJyUrOa/6clnJwZYC3qVq9nJLcypy6lvaaNbo1wOQiH8HQ+4Fw/k51pVG7JHzSf5epb8lmIW86g=="],
"follow-redirects": ["follow-redirects@1.15.9", "", {}, "sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ=="],
"for-each": ["for-each@0.3.5", "", { "dependencies": { "is-callable": "^1.2.7" } }, "sha512-dKx12eRCVIzqCxFGplyFKJMPvLEWgmNtUrpTiJIR5u97zEhRG8ySrtboPHZXx7daLxQVrl643cTzbab2tkQjxg=="],
"formats": ["formats@1.0.0", "", {}, "sha512-For0Y8egwEK96JgJo4NONErPhtl7H2QzeB2NYGmzeGeJ8a1JZqPgLYOtM3oJRCYhmgsdDFd6KGRYyfe37XY4Yg=="],
@ -498,6 +503,8 @@
"is-boolean-object": ["is-boolean-object@1.2.2", "", { "dependencies": { "call-bound": "^1.0.3", "has-tostringtag": "^1.0.2" } }, "sha512-wa56o2/ElJMYqjCjGkXri7it5FbebW5usLw/nPmCMs5DeZ7eziSYZhSmPRn0txqeW4LnAmQQU7FgqLpsEFKM4A=="],
"is-buffer": ["is-buffer@1.1.6", "", {}, "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w=="],
"is-callable": ["is-callable@1.2.7", "", {}, "sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA=="],
"is-data-view": ["is-data-view@1.0.2", "", { "dependencies": { "call-bound": "^1.0.2", "get-intrinsic": "^1.2.6", "is-typed-array": "^1.1.13" } }, "sha512-RKtWF8pGmS87i2D6gqQu/l7EYRlVdfzemCJN/P3UOs//x1QE7mfhvzHIApBTRf7axvT6DMGwSwBXYCT0nfB9xw=="],
@ -644,6 +651,8 @@
"path-to-regexp": ["path-to-regexp@0.1.7", "", {}, "sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ=="],
"pixabay-api": ["pixabay-api@1.0.4", "", { "dependencies": { "@types/node": "^8.0.4", "axios": "^0.16.2" } }, "sha512-OmV0ciG+Ouosn8csp8fBta32HFAfYurKUYb4vgZphIiPneXHS4x3bNilSWiWpU7SdWAGBXnAKQWsl1s2g7E8eQ=="],
"pkce-challenge": ["pkce-challenge@5.0.0", "", {}, "sha512-ueGLflrrnvwB3xuo/uGob5pd5FN7l0MsLf0Z87o/UQmRtwjvfylfc9MurIxRAWywCYTgrvpXBcqjV4OfCYGCIQ=="],
"pngjs": ["pngjs@7.0.0", "", {}, "sha512-LKWqWJRhstyYo9pGvgor/ivk2w94eSjE3RGVuzLGlr3NmD8bf7RcYGze1mNdEHRP6TRP6rMuDHk5t44hnTRyow=="],
@ -880,6 +889,8 @@
"morgan/debug": ["debug@2.6.9", "", { "dependencies": { "ms": "2.0.0" } }, "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA=="],
"pixabay-api/@types/node": ["@types/node@8.10.66", "", {}, "sha512-tktOkFUA4kXx2hhhrB8bIFb5TbwzS4uOhKEmwiD+NoiL0qtP2OQ9mFldbgD4dV1djrlBYP6eBuQZiWjuHUpqFw=="],
"router/depd": ["depd@2.0.0", "", {}, "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw=="],
"router/path-to-regexp": ["path-to-regexp@8.2.0", "", {}, "sha512-TdrF7fW9Rphjq4RjrW0Kp2AW0Ahwu9sRGTkS6bvDi0SCwZlEZYmcfDbEsTz8RVk0EHIS/Vd1bv3JhG+1xZuAyQ=="],

View File

@ -1,15 +1,5 @@
# project-whitespace
# whitespace
To install dependencies:
# Demo
```bash
bun install
```
To run:
```bash
bun run index.ts
```
This project was created using `bun init` in bun v1.2.18. [Bun](https://bun.sh) is a fast all-in-one JavaScript runtime.
https://share.cleanshot.com/94tmJzCw

View File

@ -17,6 +17,7 @@
"@workshop/shared": "workspace:*",
"hono": "catalog:",
"luxon": "^3.7.1",
"pixabay-api": "^1.0.4",
"pngjs": "^7.0.0",
"tailwind": "^4.0.0",
"zod": "catalog:"

Binary file not shown.

Before

Width:  |  Height:  |  Size: 987 KiB

After

Width:  |  Height:  |  Size: 842 KiB

View File

@ -1,23 +0,0 @@
import { tool, RealtimeAgent } from "@openai/agents/realtime"
import { run } from "@openai/agents"
// 1. Define a tool to fetch the latest whiteboard image
const fetchWhiteboard = tool({
name: "fetchWhiteboard",
description: "Fetch the latest whiteboard image and return its bytes",
parameters: undefined,
execute: async () => {
return await Bun.file("public/whiteboard.png").arrayBuffer()
},
})
async function main() {
const agent = new RealtimeAgent({
name: "Spike",
instructions: "When asked to analyze the whiteboard, call fetchWhiteboard",
tools: [fetchWhiteboard],
})
const result = await run(agent, "Hey Spike, analyze the whiteboard.")
console.log("Agent response:", result.finalOutput)
}

View File

@ -1,132 +0,0 @@
import cvReady from "@techstark/opencv-js"
import { PNG } from "pngjs"
type Element = {
ymin: number
xmin: number
ymax: number
xmax: number
label: string
}
type StructuredResponse = { elements: Element[] }
export const detectShapes = async (
imageBuffer: ArrayBuffer,
minAreaPercent = 5,
maxAreaPercent = 33
): Promise<StructuredResponse> => {
const cv = await cvReady
// 1. Load & decode PNG → raw RGBA buffer
const buf = Buffer.from(imageBuffer)
const { width, height, data } = PNG.sync.read(buf)
// 2. Create a 4-ch Mat from RGBA pixels
const srcRGBA = cv.matFromArray(height, width, cv.CV_8UC4, new Uint8Array(data))
// 3. Convert → gray → blur → threshold
const gray = new cv.Mat()
cv.cvtColor(srcRGBA, gray, cv.COLOR_RGBA2GRAY)
cv.GaussianBlur(gray, gray, new cv.Size(5, 5), 0)
const thresh = new cv.Mat()
cv.adaptiveThreshold(gray, thresh, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY_INV, 11, 2)
// Morphological opening to remove small noise
const removeNoise = (mat: cvReady.Mat, kSize = 3) => {
const kernel = cv.getStructuringElement(cv.MORPH_RECT, new cv.Size(kSize, kSize))
cv.morphologyEx(mat, mat, cv.MORPH_OPEN, kernel)
kernel.delete()
}
// Morphological closing to bridge gaps in contours
const closeGaps = (mat: cvReady.Mat, kSize = 7) => {
const kernel = cv.getStructuringElement(cv.MORPH_RECT, new cv.Size(kSize, kSize))
cv.morphologyEx(mat, mat, cv.MORPH_CLOSE, kernel)
kernel.delete()
}
removeNoise(thresh, 3)
closeGaps(thresh, 7)
// 4. Find contours
const contours = new cv.MatVector()
const hierarchy = new cv.Mat()
cv.findContours(thresh, contours, hierarchy, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
const norm = (v: number, max: number) => Math.round((v / max) * 1000)
const totalImageArea = width * height
const elements: Element[] = []
for (let i = 0; i < contours.size(); i++) {
const cnt = contours.get(i)
const rect = cv.boundingRect(cnt)
// Skip shapes whose bounding box touches the image border
if (rect.x === 0 || rect.y === 0 || rect.x + rect.width === width || rect.y + rect.height === height) {
// console.log(
// `-- skip: boundingRect touches border rect=(${rect.x},${rect.y},${rect.width},${rect.height})`
// )
cnt.delete()
continue
}
// Calculate area based on bounding box
const rectArea = rect.width * rect.height
const areaPercent = (rectArea / totalImageArea) * 100
// Basic filtering (lower bound only; upper bound filter disabled)
if (areaPercent < minAreaPercent) {
// cnt.delete()
continue
} else if (areaPercent > maxAreaPercent) {
// cnt.delete()
continue
}
// console.log(`-- upper bound filter disabled (areaPercent=${areaPercent.toFixed(2)} > maxAreaPercent=${maxAreaPercent})`)
/*
const margin = Math.min(width, height) * 0.05
if (
rect.x < margin ||
rect.y < margin ||
rect.x + rect.width > width - margin ||
rect.y + rect.height > height - margin
) {
// cnt.delete()
continue
}
*/
// Simple shape classification
const peri = cv.arcLength(cnt, true)
const approx = new cv.Mat()
cv.approxPolyDP(cnt, approx, 0.02 * peri, true)
let label = "polygon"
if (approx.rows === 3) label = "triangle"
else if (approx.rows === 4) {
const aspectRatio = rect.width / rect.height
label = Math.abs(aspectRatio - 1) < 0.2 ? "square" : "rectangle"
} else if (approx.rows > 6) label = "circle"
elements.push({
ymin: norm(rect.y, gray.rows),
xmin: norm(rect.x, gray.cols),
ymax: norm(rect.y + rect.height, gray.rows),
xmax: norm(rect.x + rect.width, gray.cols),
label,
})
console.log(
`-- accepted shape #${i}: ${label} (${rect.x},${rect.y},${rect.width},${
rect.height
}) area=${areaPercent.toFixed(2)}%`
)
cnt.delete()
approx.delete()
}
// 5. Cleanup
;[srcRGBA, gray, thresh, contours, hierarchy].forEach((m: any) => m.delete())
return { elements }
}

View File

@ -1,39 +0,0 @@
{
"elements": [
{
"ymin": 583,
"xmin": 97,
"ymax": 744,
"xmax": 392,
"label": "rectangle"
},
{
"ymin": 471,
"xmin": 455,
"ymax": 680,
"xmax": 664,
"label": "circle"
},
{
"ymin": 349,
"xmin": 173,
"ymax": 442,
"xmax": 296,
"label": "circle"
},
{
"ymin": 303,
"xmin": 432,
"ymax": 466,
"xmax": 589,
"label": "circle"
},
{
"ymin": 49,
"xmin": 87,
"ymax": 255,
"xmax": 368,
"label": "circle"
}
]
}

View File

@ -16,7 +16,7 @@ const categories = [
const prompts = {
default: `Detect all of the of the following objects: ${categories}. The box_2d should be an object with ymin, xmin, ymax, xmax properties normalized to 0-1000.`,
simple: `Detect the 2d bounding boxes of the following objects: ${categories}.`,
specific: `Detect 2d inscribed box for the green circle?`,
specific: `Detect 2d bounding box for the tea kettle in the image. The box_2d should be an object with ymin, xmin, ymax, xmax properties normalized to 0-1000.`,
}
export const action = async (req: Request, params: {}) => {

View File

@ -1,64 +0,0 @@
import { Form, useAction } from "@workshop/nano-remix"
import { useEffect, useRef } from "hono/jsx"
import { RealtimeAgent, RealtimeSession } from "@openai/agents/realtime"
import { ensure } from "@workshop/shared/utils"
export const action = async (request: Request) => {
const response = await fetch("https://api.openai.com/v1/realtime/sessions", {
method: "POST",
headers: {
Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: "gpt-4o-realtime-preview-2025-06-03",
}),
})
const { client_secret } = await response.json()
return { secret: client_secret?.value }
}
export default function Voice() {
const { data, loading, error } = useAction<typeof action>()
const session = useRef<RealtimeSession | undefined>(undefined)
useEffect(() => {
if (!data?.secret) return
if (session.current) return
session.current = createSession()
session.current.connect({ apiKey: data.secret })
}, [data?.secret])
return (
<div>
{error && <p>Error: {error}</p>}
<p>Ephemeral Key: {loading ? "Loading..." : data?.secret}</p>
<Form name="voiceForm">
<button type="submit">Start Voice Session</button>
</Form>
</div>
)
}
const createSession = () => {
const agent = new RealtimeAgent({
name: "Assistant",
voice: "echo",
instructions: `
You are Spike, you are helping Corey at the whiteboard. Every question he asks will include a screenshot of the whiteboard. Sometimes his questions will be about the whiteboard, sometimes they will be about other things.
# Voice Tone
You have a very quiet and have a slight accent that is hard to place.
`,
})
const session = new RealtimeSession(agent)
session.on("error", (error) => {
console.error("Session error:", error)
})
return session
}

View File

@ -1,16 +1,21 @@
import { useRef, useState, useEffect } from "hono/jsx"
import { useStreamingAI } from "../useStreamingAI"
import { useVideo } from "../useVideo"
import { VideoOverlay, type OverlayItem } from "../videoOverlay"
import { VideoOverlay } from "../videoOverlay"
import "../index.css"
import type { OverlayItem } from "../types"
export default function Voice() {
const { audioError, transcript, isRecording: audioRecording, waitingForResponse } = useStreamingAI()
const {
audioError,
transcript,
isRecording: audioRecording,
waitingForResponse,
overlays,
} = useStreamingAI()
const videoRef = useRef<HTMLVideoElement>(null)
const video = useVideo(videoRef)
const [overlays, setOverlays] = useState<OverlayItem[]>([])
let recordingStateClass = ""
if (audioRecording) recordingStateClass = "border-red-500 border-4"
else if (waitingForResponse) recordingStateClass = "border-yellow-500 border-4"
@ -20,14 +25,12 @@ export default function Voice() {
{audioError && <p class="text-red-500">Audio Error: {audioError}</p>}
{video.error && <p class="text-red-500">Video Error: {video.error}</p>}
{transcript && <div class="absolute top-5 left-5 right-5 bg-white/90 p-4 rounded-lg">{transcript}</div>}
{!video.isRecording && (
<button
onClick={video.toggleRecording}
class="px-4 py-2 text-8xl rounded-2xl text-white bg-green-500 hover:bg-green-600"
class="px-4 uppercase py-2 text-8xl rounded-2xl text-white bg-green-500 hover:bg-green-600"
>
Start Camera
Start THE Camera
</button>
)}
@ -41,6 +44,8 @@ export default function Voice() {
/>
</VideoOverlay>
{video.isRecording && <div class="text-sm italic text-center">Hold Space to ask a question</div>}
{transcript && <div class="absolute top-5 left-5 right-5 bg-white/90 p-4 rounded-lg">{transcript}</div>}
</div>
)
}

View File

@ -1,9 +1,11 @@
import { nanoRemix } from "@workshop/nano-remix"
import { OpenAI } from "openai"
import { Agent, run, type AgentInputItem } from "@openai/agents"
import { Agent, run, webSearchTool, type AgentInputItem } from "@openai/agents"
import fs from "node:fs"
import { getErrorMessage } from "@workshop/shared/errors"
import { tools } from "./tools"
import { OverlayItemSchema } from "./types"
import z from "zod"
Bun.serve({
port: 3000,
@ -37,8 +39,12 @@ const streamResponse = async (req: Request) => {
const agent = new Agent({
name: "Whiteboard Assistant",
model: "gpt-4o",
instructions: "You are a helpful assistant that talks about a whiteboard.",
tools,
instructions: `You are a helpful assistant that talks about an image.
You will receive a transcript of a conversation and an image. Your task is to analyze the transcript and the image, then generate a response that includes text and optional overlays on the image.
The overlays are a string description of what you would overlay on the image.
`,
tools: [...tools, webSearchTool()],
})
const imagePath = "public/whiteboard.png"
@ -55,14 +61,41 @@ const streamResponse = async (req: Request) => {
]
const result = await run(agent, input, { stream: true })
const readableStream = result.toTextStream() as any // This DOES work, but typescript is a little confused so I cast it to any
console.log(`🌭`, readableStream)
return new Response(readableStream, {
const customStream = new ReadableStream({
async start(controller) {
try {
for await (const chunk of result) {
if (chunk.type === "raw_model_stream_event" && chunk.data?.type === "output_text_delta") {
const event = {
type: "text_delta",
data: chunk.data.delta,
}
controller.enqueue(`data: ${JSON.stringify(event)}\n\n`)
}
if (chunk.type === "run_item_stream_event" && chunk.item?.type === "tool_call_output_item") {
const event = {
type: "tool_output",
data: chunk.item?.output,
}
controller.enqueue(`data: ${JSON.stringify(event)}\n\n`)
}
}
controller.close()
} catch (error) {
controller.error(error)
}
},
})
return new Response(customStream, {
headers: {
"Content-Type": "text/plain",
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
Connection: "keep-alive",
"Access-Control-Allow-Origin": "*",
},
})
}

View File

@ -1,14 +1,44 @@
import { tool } from "@openai/agents"
import z from "zod"
import type { ImageOverlay } from "./types"
import { searchImages } from "pixabay-api"
import { getGeminiResponse } from "./ai"
const pixabayApiKey = "51428355-fea6dad6a1cb56273345b23b1"
export const tools = [
tool({
name: "embed video",
description: "Embed a video into the whiteboard",
parameters: z.object({ video: z.string() }),
execute(input, context) {
const { video } = input
return `Video embedded: ${video}`
name: "create an image overlay",
description: "Find an image to overlay on a video",
parameters: z.object({
whereToOverlay: z
.string()
.describe(
"Where to overlay the image (e.g., 'in the red box', 'covering the hand', 'on the left side')"
),
imageQuery: z.string().describe("Search term for image"),
}),
async execute(input, context) {
const response = await searchImages(pixabayApiKey, input.imageQuery, { per_page: 10 })
const hit = response.hits[0]!
console.log(`🌭`, `Find the 2d bounding box for this "${input.whereToOverlay}"`)
const image = await Bun.file("public/whiteboard.png").arrayBuffer()
const boundingBox = await getGeminiResponse(
image,
`Find the 2d bounding box for this question "${input.whereToOverlay}"`
)
const element = boundingBox?.elements[0]!
const overlay: ImageOverlay = {
type: "image",
src: hit.webformatURL,
xmin: element.xmin,
ymin: element.ymin,
xmax: element.xmax,
ymax: element.ymax,
}
return overlay
},
}),
]

View File

@ -0,0 +1,30 @@
import { z } from "zod"
export const TextOverlaySchema = z.object({
type: z.literal("text"),
xmin: z.number(),
ymin: z.number(),
xmax: z.number(),
ymax: z.number(),
text: z.string(),
fontSize: z.number().optional().nullable(),
fontFamily: z.string().optional().nullable(),
color: z.string().optional().nullable(),
strokeColor: z.string().optional().nullable(),
strokeWidth: z.number().optional().nullable(),
})
export const ImageOverlaySchema = z.object({
type: z.literal("image"),
xmin: z.number(),
ymin: z.number(),
xmax: z.number(),
ymax: z.number(),
src: z.string(),
})
export const OverlayItemSchema = z.union([TextOverlaySchema, ImageOverlaySchema])
export type TextOverlay = z.infer<typeof TextOverlaySchema>
export type ImageOverlay = z.infer<typeof ImageOverlaySchema>
export type OverlayItem = z.infer<typeof OverlayItemSchema>

View File

@ -1,11 +1,13 @@
import { useEffect, useRef, useState } from "hono/jsx"
import { StreamingResponse } from "./streamingAI"
import type { OverlayItem } from "./types"
export function useStreamingAI() {
const [audioError, setAudioError] = useState<string>("")
const [transcript, setTranscript] = useState<string>("")
const [isRecording, setIsRecording] = useState(false)
const [waitingForResponse, setWaitingForResponse] = useState(false)
const [overlays, setOverlays] = useState<OverlayItem[]>([])
const streamingResponseRef = useRef<StreamingResponse>(null)
const startRecording = async () => {
@ -23,14 +25,44 @@ export function useStreamingAI() {
const reader = await streamingResponseRef.current!.stop()
setWaitingForResponse(false)
const decoder = new TextDecoder()
let buffer = ""
const overlayItems: OverlayItem[] = []
while (true) {
const { done, value } = await reader.read()
if (done) break
const chunk = decoder.decode(value, { stream: true })
setTranscript((prev) => prev + chunk)
buffer += chunk
// Parse SSE messages
const lines = buffer.split("\n")
buffer = lines.pop() || ""
for (const line of lines) {
if (line.startsWith("data: ")) {
try {
const eventData = JSON.parse(line.slice(6))
if (eventData.type === "text_delta") {
setTranscript((prev) => prev + eventData.data)
} else if (eventData.type === "tool_output") {
if (eventData.data.type === "image") {
overlayItems.push(eventData.data)
}
} else if (eventData.type === "done") {
console.log("Done")
}
} catch (e) {
console.error("💥 Failed to parse SSE event:", line, e)
}
} else if (line.trim()) {
console.error("💥 Non-data line:", line)
}
}
}
setOverlays(overlayItems)
} catch (error) {
console.error("Error during streaming:", error)
setAudioError(`Streaming failed: ${error}`)
@ -70,5 +102,6 @@ export function useStreamingAI() {
waitingForResponse,
startRecording,
endRecording,
overlays,
}
}

View File

@ -1,27 +1,5 @@
import { useRef, useEffect } from "hono/jsx"
export interface TextOverlay {
type: "text"
x: number
y: number
text: string
fontSize?: number
fontFamily?: string
color?: string
strokeColor?: string
strokeWidth?: number
}
export interface ImageOverlay {
type: "image"
x: number
y: number
src: string
width?: number
height?: number
}
export type OverlayItem = TextOverlay | ImageOverlay
import type { ImageOverlay, OverlayItem, TextOverlay } from "./types"
interface VideoOverlayProps {
overlays: OverlayItem[]
@ -50,7 +28,17 @@ export function VideoOverlay({ overlays, children, isRecording }: VideoOverlayPr
canvas.height = rect.height
// Clear canvas
ctx.clearRect(0, 0, canvas.width, canvas.height)
// ctx.clearRect(0, 0, canvas.width, canvas.height)
// ctx.fillStyle = "green"
// const xmin = 250
// const ymin = 250
// const xmax = 750
// const ymax = 750
// const x = (xmin / 1000) * ctx.canvas.width
// const y = (ymin / 1000) * ctx.canvas.height
// const width = ((xmax - xmin) / 1000) * ctx.canvas.width
// const height = ((ymax - ymin) / 1000) * ctx.canvas.height
// ctx.fillRect(x, y, width, height)
// Draw overlays
for (const overlay of overlays) {
@ -65,7 +53,6 @@ export function VideoOverlay({ overlays, children, isRecording }: VideoOverlayPr
// Redraw when overlay data changes or recording state changes
useEffect(() => {
setTimeout(() => {
console.log(`🌭 `, canvasRef.current?.width, canvasRef.current?.height)
drawOverlays()
}, 1000)
}, [overlays, isRecording])
@ -78,8 +65,10 @@ export function VideoOverlay({ overlays, children, isRecording }: VideoOverlayPr
const drawText = (ctx: CanvasRenderingContext2D, overlay: TextOverlay) => {
const {
x,
y,
xmin,
ymin,
xmax,
ymax,
text,
fontSize = 20,
fontFamily = "Arial",
@ -92,21 +81,26 @@ export function VideoOverlay({ overlays, children, isRecording }: VideoOverlayPr
ctx.fillStyle = color
ctx.strokeStyle = strokeColor
ctx.lineWidth = strokeWidth
const x = (xmin / 1000) * ctx.canvas.width
const y = (ymin / 1000) * ctx.canvas.height
const width = ((xmax - xmin) / 1000) * ctx.canvas.width
const height = ((ymax - ymin) / 1000) * ctx.canvas.height
ctx.strokeText(text, x, y)
ctx.fillText(text, x, y)
}
const drawImage = (ctx: CanvasRenderingContext2D, overlay: ImageOverlay) => {
const { x, y, src, width, height } = overlay
const { xmin, ymin, xmax, ymax, src } = overlay
const img = new Image()
img.crossOrigin = "anonymous"
img.onload = () => {
const drawWidth = width || img.width
const drawHeight = height || img.height
ctx.drawImage(img, x, y, drawWidth, drawHeight)
const x = (xmin / 1000) * ctx.canvas.width
const y = (ymin / 1000) * ctx.canvas.height
const width = ((xmax - xmin) / 1000) * ctx.canvas.width
const height = ((ymax - ymin) / 1000) * ctx.canvas.height
ctx.drawImage(img, x, y, width, height)
}
img.src = src