Skip to content

Instantly share code, notes, and snippets.

@mikeslattery
Last active November 16, 2023 21:11
Show Gist options
  • Save mikeslattery/d4609196e2125c643f4b44d59ea0a121 to your computer and use it in GitHub Desktop.
Save mikeslattery/d4609196e2125c643f4b44d59ea0a121 to your computer and use it in GitHub Desktop.
AI Webcam to html
#!/bin/bash
# Takes a photo and converts to a project text artifact.
# Just experimental for now.
set -euo pipefail
THISDIR="$(dirname "$0")"
#shellcheck disable=SC1091
source "$THISDIR/.env"
DEVICE=/dev/video1
SIZE=512x512
checks() {
die() {
echo "$*" >&2
exit 1
}
requires() {
command -v "$1" &>/dev/null || die "You need to install $1"
}
requires fswebcam
requires base64
requires jq
requires jo
requires curl
[[ -n "$OPENAI_API_KEY" ]] || die "OPENAI_API_KEY not set"
shellcheck -x "$0"
# Android requirments
# requires ffmpeg
# ssh phone 'command -v termux-camera-photo' &>/dev/null || die "Cannot use phone"
}
main() {
checks
#TODO: prompt hardcoded for now.
# prompt="Generate a Graphviz dot file based on the image."
# prompt="Generate a PlantUML ERD file based on the image. And generate SQL DDL to build it."
prompt="Generate an html form based on the image. Style with Bulma."
prompt="${prompt}\nOnly generate the raw text. Do not produce commentary or surrounding markdown."
photo="$(take_photo)"
generate_data "$prompt" "$photo" | sendchat | sed '/```/d'
}
#usage: tmpfile --suffix=.jpg
tmpfile() {
file="$(mktemp --dry-run "$@")"
#shellcheck disable=SC2064
trap "rm -f '$file'" EXIT
echo -n "$file"
}
take_photo() {
file="$(tmpfile --suffix=.jpg)"
fswebcam -q -r "$SIZE" --jpeg 20 --no-banner -d "$DEVICE" "$file"
local base64_image="$(base64 -w 0 "$file")"
echo -n "data:image/jpeg;base64,${base64_image}"
}
#shellcheck disable=SC2034,SC2276
generate_data() {
prompt="$1"; shift
url="$1"; shift
jo \
model="gpt-4-vision-preview" \
'messages[]'="$(jo \
'role'="user" \
'content[]'="$(jo type=text text="$prompt")" \
'content[]'="$(jo type=image_url image_url="$(jo url="$url")")"
)" \
max_tokens=3000 \
"$@"
}
sendchat() {
# stdin/stdout will be request/response body.
curl -sSf -X POST \
"https://api.openai.com/v1/chat/completions" \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-H "Content-Type: application/json" \
-H "Accept: application/json" \
--max-time 180 --retry 5 --retry-delay 3 \
--json @- | \
jq '[.choices[].message.content] | join("\n\n----\n\n")' -r
}
# Requires android, Termux, Termux::API, sshd, and "phone" host alias
take_android_photo() {
file="ai.jpg"
image="$(ssh phone 'echo $TMPDIR/ai.jpg')"
#shellcheck disable=SC2029
ssh phone "termux-camera-photo -c 0 '$image'"
scp "phone:$image" "$file"
ffmpeg -i ai.jpg -q:v 5 -vf scale=512:512 ai2.jpg
rm ai.jpg
mv ai2.jpg ai.jpg
local base64_image="$(base64 -w 0 "$file")"
echo -n "data:image/jpeg;base64,${base64_image}"
#TODO: got lazy. need to use tmp files
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment