Skip to content

Instantly share code, notes, and snippets.

@NamanGoyalK
Created November 4, 2025 05:59
Show Gist options
  • Save NamanGoyalK/eee02509125fd0fd56a0d7db856a45f8 to your computer and use it in GitHub Desktop.
Save NamanGoyalK/eee02509125fd0fd56a0d7db856a45f8 to your computer and use it in GitHub Desktop.
AI Assistant for Hyprland - OCR, Image Analysis & Text Explanation using Gemini API
#!/bin/bash
# ================================================
# Hyprland AI Assistant
# ================================================
# A smart AI assistant for Hyprland/Wayland that provides:
# - OCR text extraction and analysis
# - Image analysis with AI vision
# - Quick explanations for selected text
#
# Powered by Google Gemini API
#
# DEPENDENCIES:
# - grim, slurp (screenshot tools)
# - tesseract (OCR)
# - jq (JSON parsing)
# - wl-clipboard (clipboard access)
# - zenity (GUI dialogs)
# - curl (API requests)
# - imagemagick (image compression)
#
# INSTALLATION:
# 1. Install dependencies:
# sudo pacman -S grim slurp tesseract jq wl-clipboard zenity curl imagemagick
#
# 2. Get a Gemini API key:
# https://makersuite.google.com/app/apikey
#
# 3. Add your API key to the script (line ~30)
#
# 4. Make executable:
# chmod +x hyprland-ai-assistant.sh
#
# 5. (Optional) Bind to a keybind in Hyprland config:
# bind = $mainMod, A, exec, /path/to/hyprland-ai-assistant.sh
#
# USAGE:
# Run the script, choose your mode, and let AI do the work!
# ================================================
# TODO: Add your Gemini API key here
API_KEY=""
API_URL_TEXT="https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=${API_KEY}"
API_URL_VISION="https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=${API_KEY}"
TEMP_IMG="/tmp/ai_query.png"
TEMP_RESPONSE="/tmp/ai_response.txt"
# Check if API key is set
if [ -z "$API_KEY" ]; then
notify-send "AI Assistant" "Error: API_KEY is not set! Edit the script and add your Gemini API key." -u critical
exit 1
fi
# Check for required dependencies
MISSING_DEPS=()
command -v grim &> /dev/null || MISSING_DEPS+=("grim")
command -v slurp &> /dev/null || MISSING_DEPS+=("slurp")
command -v tesseract &> /dev/null || MISSING_DEPS+=("tesseract")
command -v jq &> /dev/null || MISSING_DEPS+=("jq")
command -v wl-paste &> /dev/null || MISSING_DEPS+=("wl-clipboard")
command -v wl-copy &> /dev/null || MISSING_DEPS+=("wl-clipboard")
command -v zenity &> /dev/null || MISSING_DEPS+=("zenity")
command -v curl &> /dev/null || MISSING_DEPS+=("curl")
command -v base64 &> /dev/null || MISSING_DEPS+=("base64")
command -v convert &> /dev/null || MISSING_DEPS+=("imagemagick")
if [ ${#MISSING_DEPS[@]} -ne 0 ]; then
DEPS_LIST=$(printf ", %s" "${MISSING_DEPS[@]}")
DEPS_LIST=${DEPS_LIST:2}
notify-send "AI Assistant" "Missing dependencies: ${DEPS_LIST}\n\nInstall with: sudo pacman -S ${DEPS_LIST}" -u critical
exit 1
fi
# Show the menu first
if command -v zenity &> /dev/null; then
CHOICE=$(zenity --list \
--title="AI Assistant" \
--text="What do you want to do?" \
--column="Option" \
"1. OCR Text (Screenshot → Extract & Analyze)" \
"2. Image Analysis (Screenshot → AI Vision)" \
"3. Explain Selected Text (From Clipboard)" \
--width=500 --height=300)
else
notify-send "AI Assistant" "zenity not found, defaulting to OCR..." -t 2000
CHOICE="1. OCR Text (Screenshot → Extract & Analyze)"
fi
# User cancelled
if [ -z "$CHOICE" ] || echo "$CHOICE" | grep -q "Cancel"; then
notify-send "AI Assistant" "Cancelled" -u low
exit 0
fi
# Handle clipboard text mode
if echo "$CHOICE" | grep -q "Explain Selected Text"; then
notify-send "AI Assistant" "Getting clipboard text..." -t 1000
QUERY=$(wl-paste 2>/dev/null)
if [ -z "$QUERY" ]; then
notify-send "AI Assistant" "Nothing in clipboard! Select some text first." -u critical
exit 1
fi
# cleanup whitespace
QUERY=$(echo "$QUERY" | tr -s ' ' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
WORD_COUNT=$(echo "$QUERY" | wc -w)
# figure out what kind of query this is
PROMPT_SUFFIX=" Respond in plain text only, no markdown, no formatting, no asterisks or special characters. Just give me the facts."
if [ $WORD_COUNT -eq 1 ]; then
PROMPT="Define \"$QUERY\" - include part of speech and an example.${PROMPT_SUFFIX}"
elif [ $WORD_COUNT -le 3 ]; then
PROMPT="What does \"$QUERY\" mean?${PROMPT_SUFFIX}"
elif echo "$QUERY" | grep -qiE "error|exception|traceback|failed|warning"; then
PROMPT="Explain this error and how to fix it:\n\n$QUERY\n\n${PROMPT_SUFFIX}"
elif echo "$QUERY" | grep -qE "^(def|function|class|import|const|let|var|public|private|int|void|\{|\})"; then
PROMPT="Give the output first, then explain this code:\n\n$QUERY\n\n${PROMPT_SUFFIX}"
elif echo "$QUERY" | grep -qE "\?$"; then
PROMPT="$QUERY\n\n${PROMPT_SUFFIX}"
else
PROMPT="$QUERY\n\n${PROMPT_SUFFIX}"
fi
notify-send "AI Assistant" "Processing..." -t 2000
JSON_PAYLOAD=$(jq -n \
--arg prompt "$PROMPT" \
'{
contents: [{
parts: [{
text: $prompt
}]
}]
}')
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "$API_URL_TEXT" \
-H "Content-Type: application/json" \
-d "$JSON_PAYLOAD")
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
RESPONSE=$(echo "$RESPONSE" | head -n-1)
if [ "$HTTP_CODE" != "200" ]; then
notify-send "AI Assistant" "API Error (HTTP $HTTP_CODE)\nCheck your API key or quota" -u critical
echo "HTTP Error: $HTTP_CODE" >> /tmp/ai_debug.log
echo "$RESPONSE" >> /tmp/ai_debug.log
exit 1
fi
# OCR mode - grab screenshot and extract text
elif echo "$CHOICE" | grep -q "OCR"; then
grim -g "$(slurp)" "$TEMP_IMG" 2>/dev/null || {
notify-send "AI Assistant" "Screenshot cancelled" -u low
exit 1
}
QUERY=$(tesseract "$TEMP_IMG" - -l eng 2>/dev/null)
if [ -z "$QUERY" ]; then
notify-send "AI Assistant" "Couldn't find any text!" -u critical
rm -f "$TEMP_IMG"
exit 1
fi
QUERY=$(echo "$QUERY" | tr -s ' ' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
WORD_COUNT=$(echo "$QUERY" | wc -w)
PROMPT_SUFFIX=" Respond in plain text only, no markdown, no formatting, no asterisks or special characters. Just give me the facts."
# decide how to prompt based on what we found
if [ $WORD_COUNT -eq 1 ]; then
PROMPT="Define \"$QUERY\" - include part of speech and an example.${PROMPT_SUFFIX}"
elif [ $WORD_COUNT -le 3 ]; then
PROMPT="What does \"$QUERY\" mean?${PROMPT_SUFFIX}"
elif echo "$QUERY" | grep -qiE "error|exception|traceback|failed|warning"; then
PROMPT="Explain this error and how to fix it:\n\n$QUERY\n\n${PROMPT_SUFFIX}"
elif echo "$QUERY" | grep -qE "^(def|function|class|import|const|let|var|public|private|int|void|\{|\})"; then
PROMPT="Give the output first, then explain this code:\n\n$QUERY\n\n${PROMPT_SUFFIX}"
elif echo "$QUERY" | grep -qE "\?$"; then
PROMPT="$QUERY\n\n${PROMPT_SUFFIX}"
else
PROMPT="$QUERY\n\n${PROMPT_SUFFIX}"
fi
notify-send "AI Assistant" "Processing text..." -t 2000
JSON_PAYLOAD=$(jq -n \
--arg prompt "$PROMPT" \
'{
contents: [{
parts: [{
text: $prompt
}]
}]
}')
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "$API_URL_TEXT" \
-H "Content-Type: application/json" \
-d "$JSON_PAYLOAD")
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
RESPONSE=$(echo "$RESPONSE" | head -n-1)
if [ "$HTTP_CODE" != "200" ]; then
notify-send "AI Assistant" "API Error (HTTP $HTTP_CODE)\nCheck your API key or quota" -u critical
echo "HTTP Error: $HTTP_CODE" >> /tmp/ai_debug.log
echo "$RESPONSE" >> /tmp/ai_debug.log
rm -f "$TEMP_IMG"
exit 1
fi
rm -f "$TEMP_IMG"
# Image analysis mode
else
grim -g "$(slurp)" "$TEMP_IMG" 2>/dev/null || {
notify-send "AI Assistant" "Screenshot cancelled" -u low
exit 1
}
notify-send "AI Assistant" "Analyzing image..." -t 2000
# Compress image before sending to API (saves tokens/cost)
TEMP_COMPRESSED="/tmp/ai_query_compressed.jpg"
convert "$TEMP_IMG" -resize 1280x1280\> -quality 85 "$TEMP_COMPRESSED"
# Use compressed image if conversion succeeded
if [ -f "$TEMP_COMPRESSED" ]; then
IMAGE_FILE="$TEMP_COMPRESSED"
else
IMAGE_FILE="$TEMP_IMG"
fi
IMAGE_BASE64=$(base64 -w 0 "$IMAGE_FILE")
MIME_TYPE=$(file --mime-type -b "$IMAGE_FILE")
# ask for optional instructions
if command -v zenity &> /dev/null; then
USER_PROMPT=$(zenity --entry \
--title="AI Assistant - Image Analysis" \
--text="Any specific instructions? (optional)" \
--width=500)
else
USER_PROMPT=""
fi
if [ -z "$USER_PROMPT" ]; then
USER_PROMPT="Analyze this image. Describe everything you see - text, objects, diagrams, whatever's there. If there's text, transcribe it. If it's a chart or diagram, explain it. Keep it plain text, no markdown."
else
USER_PROMPT="${USER_PROMPT} Keep response in plain text, no markdown."
fi
# debug stuff
echo "Original image: $TEMP_IMG" > /tmp/ai_debug.log
echo "Compressed image: $IMAGE_FILE" >> /tmp/ai_debug.log
echo "Image size: ${#IMAGE_BASE64}" >> /tmp/ai_debug.log
echo "MIME: $MIME_TYPE" >> /tmp/ai_debug.log
echo "Prompt: $USER_PROMPT" >> /tmp/ai_debug.log
TEMP_JSON="/tmp/ai_payload.json"
cat > "$TEMP_JSON" <<EOF
{
"contents": [
{
"parts": [
{
"text": "$USER_PROMPT"
},
{
"inline_data": {
"mime_type": "$MIME_TYPE",
"data": "$IMAGE_BASE64"
}
}
]
}
]
}
EOF
echo "JSON payload: $(wc -c < "$TEMP_JSON") bytes" >> /tmp/ai_debug.log
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "$API_URL_VISION" \
-H "Content-Type: application/json" \
-d "@$TEMP_JSON")
HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
RESPONSE=$(echo "$RESPONSE" | head -n-1)
if [ "$HTTP_CODE" != "200" ]; then
notify-send "AI Assistant" "API Error (HTTP $HTTP_CODE)\nCheck your API key or quota" -u critical
echo "HTTP Error: $HTTP_CODE" >> /tmp/ai_debug.log
echo "$RESPONSE" >> /tmp/ai_debug.log
rm -f "$TEMP_JSON"
rm -f "$TEMP_IMG"
exit 1
fi
rm -f "$TEMP_JSON"
rm -f "$TEMP_IMG"
QUERY="[Image Analysis]"
fi
# Parse response
echo "" >> /tmp/ai_debug.log
echo "Raw API response:" >> /tmp/ai_debug.log
echo "$RESPONSE" >> /tmp/ai_debug.log
AI_ANSWER=$(echo "$RESPONSE" | jq -r '.candidates[0].content.parts[0].text' 2>/dev/null)
if [ -z "$AI_ANSWER" ] || [ "$AI_ANSWER" = "null" ]; then
# Try to extract error message from API response
ERROR_MSG=$(echo "$RESPONSE" | jq -r '.error.message' 2>/dev/null)
if [ -n "$ERROR_MSG" ] && [ "$ERROR_MSG" != "null" ]; then
notify-send "AI Assistant" "API Error: $ERROR_MSG" -u critical
echo "API Error: $ERROR_MSG" >> /tmp/ai_debug.log
else
notify-send "AI Assistant" "Failed to get response - check /tmp/ai_debug.log" -u critical
echo "Error: couldn't extract answer" >> /tmp/ai_debug.log
fi
exit 1
fi
# save to file
echo "Query: $QUERY" > "$TEMP_RESPONSE"
echo "" >> "$TEMP_RESPONSE"
echo "$AI_ANSWER" >> "$TEMP_RESPONSE"
RESPONSE_LENGTH=${#AI_ANSWER}
# short answer -> notification, long answer -> zenity window
if [ $RESPONSE_LENGTH -lt 150 ]; then
notify-send "AI Assistant" "$AI_ANSWER" -t 5000
else
if command -v zenity &> /dev/null; then
zenity --text-info \
--title="AI Assistant" \
--width=800 \
--height=600 \
--font="JetBrainsMonoNF-Regular 16" \
--filename="$TEMP_RESPONSE" \
--checkbox="Copy to clipboard"
if [ $? -eq 0 ]; then
if command -v wl-copy &> /dev/null; then
cat "$TEMP_RESPONSE" | wl-copy
notify-send "AI Assistant" "Copied to clipboard!" -t 2000
else
notify-send "AI Assistant" "wl-clipboard not installed (sudo pacman -S wl-clipboard)" -u normal
fi
fi
elif command -v kitty &> /dev/null; then
kitty -1 -e sh -c "cat '$TEMP_RESPONSE' | less -R" &
else
notify-send "AI Assistant" "Response saved to $TEMP_RESPONSE" -t 5000
fi
fi
exit 0
@ArtificialXDev
Copy link

Nice!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment