mikeslattery · November 16, 2023 21:11
diff --git a/webcam2html.sh b/webcam2html.sh
 #!/bin/bash

 # Takes a photo and converts to a project text artifact.
 # Just experimental for now.

 set -euo pipefail
 THISDIR="$(dirname "$0")"
 #shellcheck disable=SC1091
 source "$THISDIR/.env"

 DEVICE=/dev/video1
 SIZE=512x512

 checks() {
  die() {
    echo "$*" >&2
    exit 1
  }
  requires() {
    command -v "$1" &>/dev/null || die "You need to install $1"
  }

  requires fswebcam
  requires base64
  requires jq
  requires jo
  requires curl
  [[ -n "$OPENAI_API_KEY" ]] || die "OPENAI_API_KEY not set"
  shellcheck -x "$0"

  # Android requirments
  # requires ffmpeg
  # ssh phone 'command -v termux-camera-photo' &>/dev/null || die "Cannot use phone"
 }

 main() {
  checks

  #TODO: prompt hardcoded for now.
  # prompt="Generate a Graphviz dot file based on the image."
  # prompt="Generate a PlantUML ERD file based on the image. And generate SQL DDL to build it."
  prompt="Generate an html form based on the image.  Style with Bulma."
  prompt="${prompt}\nOnly generate the raw text.  Do not produce commentary or surrounding markdown."

  photo="$(take_photo)"

  generate_data "$prompt" "$photo" | sendchat | sed '/```/d'
 }

 #usage: tmpfile --suffix=.jpg
 tmpfile() {
  file="$(mktemp --dry-run "$@")"
  #shellcheck disable=SC2064
  trap "rm -f '$file'" EXIT
  echo -n "$file"
 }

 take_photo() {
  file="$(tmpfile --suffix=.jpg)"
  fswebcam -q -r "$SIZE" --jpeg 20 --no-banner -d "$DEVICE" "$file"
  local base64_image="$(base64 -w 0 "$file")"
  echo -n "data:image/jpeg;base64,${base64_image}"
 }

 #shellcheck disable=SC2034,SC2276
 generate_data() {
  prompt="$1"; shift
  url="$1"; shift

  jo \
    model="gpt-4-vision-preview" \
    'messages[]'="$(jo \
      'role'="user" \
      'content[]'="$(jo type=text text="$prompt")" \
      'content[]'="$(jo type=image_url image_url="$(jo url="$url")")"
    )" \
    max_tokens=3000 \
    "$@"
 }

 sendchat() {
  # stdin/stdout will be request/response body.
  curl -sSf -X POST \
    "https://api.openai.com/v1/chat/completions" \
    -H "Authorization: Bearer $OPENAI_API_KEY" \
    -H "Content-Type: application/json" \
    -H "Accept: application/json" \
    --max-time 180 --retry 5 --retry-delay 3 \
    --json @- | \
    jq '[.choices[].message.content] | join("\n\n----\n\n")' -r
 }

 # Requires android, Termux, Termux::API, sshd, and "phone" host alias
 take_android_photo() {
  file="ai.jpg"

  image="$(ssh phone 'echo $TMPDIR/ai.jpg')"
  #shellcheck disable=SC2029
  ssh phone "termux-camera-photo -c 0 '$image'"
  scp "phone:$image" "$file"

  ffmpeg -i ai.jpg -q:v 5 -vf scale=512:512 ai2.jpg
  rm ai.jpg
  mv ai2.jpg ai.jpg

  local base64_image="$(base64 -w 0 "$file")"
  echo -n "data:image/jpeg;base64,${base64_image}"

  #TODO: got lazy. need to use tmp files
 }

 main "$@"
	#!/bin/bash

	# Takes a photo and converts to a project text artifact.
	# Just experimental for now.

	set -euo pipefail
	THISDIR="$(dirname "$0")"
	#shellcheck disable=SC1091
	source "$THISDIR/.env"

	DEVICE=/dev/video1
	SIZE=512x512

	checks() {
	die() {
	echo "$*" >&2
	exit 1
	}
	requires() {
	command -v "$1" &>/dev/null \|\| die "You need to install $1"
	}

	requires fswebcam
	requires base64
	requires jq
	requires jo
	requires curl
	[[ -n "$OPENAI_API_KEY" ]] \|\| die "OPENAI_API_KEY not set"
	shellcheck -x "$0"

	# Android requirments
	# requires ffmpeg
	# ssh phone 'command -v termux-camera-photo' &>/dev/null \|\| die "Cannot use phone"
	}

	main() {
	checks

	#TODO: prompt hardcoded for now.
	# prompt="Generate a Graphviz dot file based on the image."
	# prompt="Generate a PlantUML ERD file based on the image. And generate SQL DDL to build it."
	prompt="Generate an html form based on the image. Style with Bulma."
	prompt="${prompt}\nOnly generate the raw text. Do not produce commentary or surrounding markdown."

	photo="$(take_photo)"

	generate_data "$prompt" "$photo" \| sendchat \| sed '/```/d'
	}

	#usage: tmpfile --suffix=.jpg
	tmpfile() {
	file="$(mktemp --dry-run "$@")"
	#shellcheck disable=SC2064
	trap "rm -f '$file'" EXIT
	echo -n "$file"
	}

	take_photo() {
	file="$(tmpfile --suffix=.jpg)"
	fswebcam -q -r "$SIZE" --jpeg 20 --no-banner -d "$DEVICE" "$file"
	local base64_image="$(base64 -w 0 "$file")"
	echo -n "data:image/jpeg;base64,${base64_image}"
	}

	#shellcheck disable=SC2034,SC2276
	generate_data() {
	prompt="$1"; shift
	url="$1"; shift

	jo \
	model="gpt-4-vision-preview" \
	'messages[]'="$(jo \
	'role'="user" \
	'content[]'="$(jo type=text text="$prompt")" \
	'content[]'="$(jo type=image_url image_url="$(jo url="$url")")"
	)" \
	max_tokens=3000 \
	"$@"
	}

	sendchat() {
	# stdin/stdout will be request/response body.
	curl -sSf -X POST \
	"https://api.openai.com/v1/chat/completions" \
	-H "Authorization: Bearer $OPENAI_API_KEY" \
	-H "Content-Type: application/json" \
	-H "Accept: application/json" \
	--max-time 180 --retry 5 --retry-delay 3 \
	--json @- \| \
	jq '[.choices[].message.content] \| join("\n\n----\n\n")' -r
	}

	# Requires android, Termux, Termux::API, sshd, and "phone" host alias
	take_android_photo() {
	file="ai.jpg"

	image="$(ssh phone 'echo $TMPDIR/ai.jpg')"
	#shellcheck disable=SC2029
	ssh phone "termux-camera-photo -c 0 '$image'"
	scp "phone:$image" "$file"

	ffmpeg -i ai.jpg -q:v 5 -vf scale=512:512 ai2.jpg
	rm ai.jpg
	mv ai2.jpg ai.jpg

	local base64_image="$(base64 -w 0 "$file")"
	echo -n "data:image/jpeg;base64,${base64_image}"

	#TODO: got lazy. need to use tmp files
	}

	main "$@"