This commit is contained in:
Chris Wanstrath 2025-06-18 15:56:35 -07:00
parent d16cadad4d
commit 6204f01912

View File

@ -4,10 +4,20 @@ import { openai, createFile } from "./src/openai"
import { serveStatic } from "hono/bun"
import fs from "fs"
const CAMERA = process.env.IAGO_CAMERA || "Set IAGO_CAMERA environment variable"
const PROMPT = "What text do you see in this image?"
const IMAGE_PATH = "./photo.jpg"
const PROMPT = `
This image contains a corkboard with index cards on it - let's focus on that.
Please return the text content of each index card followed by a single line break, and nothing else.
Some humans write in ALL CAPS. Keep technical acronyms (DB, AI, HTML) in ALL CAPS, but convert everything else to Titlecase, Please.
Once you have gathered all the text, please scan it to make sure it looks like a human wrote it.
eg "It 's Steve Y'all !" => "It's Steve Y'all!"
`
const CAMERA = process.env.IAGO_CAMERA || "Set IAGO_CAMERA environment variable"
const IMAGE_PATH = "/tmp/iago.jpg"
const app = new Hono()
// Serve static files from public directory
@ -39,27 +49,18 @@ app.get("/capture", async (c) => {
}
})
// return the last captured image
app.get("/last.jpg", async (c) => {
const image = await Bun.file(IMAGE_PATH).arrayBuffer()
return new Response(image, {
headers: { "Content-Type": "image/jpeg" },
})
})
// capture and analyze image
app.get("/analyze", async (c) => {
try {
await runImagesnap()
const fileId = await createFile(IMAGE_PATH)
const result = await openai.responses.create({
model: "gpt-4o",
input: [
{
role: "user",
content: [
{ type: "input_text", text: PROMPT },
{ type: "input_image", file_id: fileId, detail: "high" }
]
}
]
})
return c.json({ result: result.output_text })
return c.json({ result: await analyze() })
} catch (err: any) {
return c.json({ error: err.message }, 500)
}
@ -69,24 +70,8 @@ app.get("/analyze", async (c) => {
// capture and analyze image, return HTML
app.get("/analyze.html", async (c) => {
try {
await runImagesnap()
const fileId = await createFile(IMAGE_PATH)
const result = await openai.responses.create({
model: "gpt-4o",
input: [
{
role: "user",
content: [
{ type: "input_text", text: "This image contains a corkboard with index cards on it. Please return the text content of each index card followed by a line break, and nothing else." },
{ type: "input_image", file_id: fileId, detail: "high" }
]
}
]
})
return c.html(`<h2>corkboard</h2><ul>${result.output_text.split("\n").map(line => `<li>${line}</li>`).join("")}</ul>`)
const result = await analyze()
return c.html(`<h2>corkboard</h2><ul>${result.split("\n").filter(line => line.trim()).map(line => `<li>${line}</li>`).join("")}</ul>`)
} catch (err: any) {
return c.json({ error: err.message }, 500)
}
@ -148,6 +133,27 @@ app.post("/transcribe", async (c) => {
}
})
async function analyze(): Promise<string> {
await runImagesnap()
const fileId = await createFile(IMAGE_PATH)
const result = await openai.responses.create({
model: "gpt-4o",
input: [
{
role: "user",
content: [
{ type: "input_text", text: PROMPT },
{ type: "input_image", file_id: fileId, detail: "high" }
]
}
]
})
return result.output_text
}
// Capture image using Bun.spawn
async function runImagesnap(): Promise<void> {
const proc = await $`imagesnap -d ${CAMERA} ${IMAGE_PATH}`