Last active
November 16, 2023 21:11
-
-
Save mikeslattery/d4609196e2125c643f4b44d59ea0a121 to your computer and use it in GitHub Desktop.
AI Webcam to html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Takes a photo and converts to a project text artifact. | |
# Just experimental for now. | |
set -euo pipefail | |
THISDIR="$(dirname "$0")" | |
#shellcheck disable=SC1091 | |
source "$THISDIR/.env" | |
DEVICE=/dev/video1 | |
SIZE=512x512 | |
checks() { | |
die() { | |
echo "$*" >&2 | |
exit 1 | |
} | |
requires() { | |
command -v "$1" &>/dev/null || die "You need to install $1" | |
} | |
requires fswebcam | |
requires base64 | |
requires jq | |
requires jo | |
requires curl | |
[[ -n "$OPENAI_API_KEY" ]] || die "OPENAI_API_KEY not set" | |
shellcheck -x "$0" | |
# Android requirments | |
# requires ffmpeg | |
# ssh phone 'command -v termux-camera-photo' &>/dev/null || die "Cannot use phone" | |
} | |
main() { | |
checks | |
#TODO: prompt hardcoded for now. | |
# prompt="Generate a Graphviz dot file based on the image." | |
# prompt="Generate a PlantUML ERD file based on the image. And generate SQL DDL to build it." | |
prompt="Generate an html form based on the image. Style with Bulma." | |
prompt="${prompt}\nOnly generate the raw text. Do not produce commentary or surrounding markdown." | |
photo="$(take_photo)" | |
generate_data "$prompt" "$photo" | sendchat | sed '/```/d' | |
} | |
#usage: tmpfile --suffix=.jpg | |
tmpfile() { | |
file="$(mktemp --dry-run "$@")" | |
#shellcheck disable=SC2064 | |
trap "rm -f '$file'" EXIT | |
echo -n "$file" | |
} | |
take_photo() { | |
file="$(tmpfile --suffix=.jpg)" | |
fswebcam -q -r "$SIZE" --jpeg 20 --no-banner -d "$DEVICE" "$file" | |
local base64_image="$(base64 -w 0 "$file")" | |
echo -n "data:image/jpeg;base64,${base64_image}" | |
} | |
#shellcheck disable=SC2034,SC2276 | |
generate_data() { | |
prompt="$1"; shift | |
url="$1"; shift | |
jo \ | |
model="gpt-4-vision-preview" \ | |
'messages[]'="$(jo \ | |
'role'="user" \ | |
'content[]'="$(jo type=text text="$prompt")" \ | |
'content[]'="$(jo type=image_url image_url="$(jo url="$url")")" | |
)" \ | |
max_tokens=3000 \ | |
"$@" | |
} | |
sendchat() { | |
# stdin/stdout will be request/response body. | |
curl -sSf -X POST \ | |
"https://api.openai.com/v1/chat/completions" \ | |
-H "Authorization: Bearer $OPENAI_API_KEY" \ | |
-H "Content-Type: application/json" \ | |
-H "Accept: application/json" \ | |
--max-time 180 --retry 5 --retry-delay 3 \ | |
--json @- | \ | |
jq '[.choices[].message.content] | join("\n\n----\n\n")' -r | |
} | |
# Requires android, Termux, Termux::API, sshd, and "phone" host alias | |
take_android_photo() { | |
file="ai.jpg" | |
image="$(ssh phone 'echo $TMPDIR/ai.jpg')" | |
#shellcheck disable=SC2029 | |
ssh phone "termux-camera-photo -c 0 '$image'" | |
scp "phone:$image" "$file" | |
ffmpeg -i ai.jpg -q:v 5 -vf scale=512:512 ai2.jpg | |
rm ai.jpg | |
mv ai2.jpg ai.jpg | |
local base64_image="$(base64 -w 0 "$file")" | |
echo -n "data:image/jpeg;base64,${base64_image}" | |
#TODO: got lazy. need to use tmp files | |
} | |
main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment