Created
May 12, 2025 00:00
-
-
Save melastmohican/383c3f92029eec269b72b611011fb83c to your computer and use it in GitHub Desktop.
Generate title, caption and keywords from attached image using Gemini
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env lua | |
local http = require("socket.http") | |
local ltn12 = require("ltn12") | |
local mime = require("mime") -- For base64 | |
local json = require("dkjson") -- dkjson is a JSON library for Lua | |
-- Load and encode image | |
local function read_image_base64(path) | |
local file = assert(io.open(path, "rb")) | |
local content = file:read("*all") | |
file:close() | |
return mime.b64(content) | |
end | |
-- JSON prompt message | |
local prompt_text = [[ | |
You are a professional photography analyst with expertise in object recognition and computer-generated image description. | |
You also try to identify famous buildings and landmarks as well as the location where the photo was taken. | |
Furthermore, you aim to specify animal and plant species as accurately as possible. Always give common name followed by the scientific name in brackets e.g (Beta vulgaris). | |
You also describe objects—such as vehicle types and manufacturers—as specifically as you can. | |
Analyze the uploaded photo and generate the following data: | |
* Keywords (comma-separated list of 50 single-word keywords) | |
* Image title | |
* Image caption (Maximum 200 characters) | |
Make sure the result is in JSON format: | |
{ | |
"title": "", | |
"caption": "", | |
"keywords": "key1,key2" | |
} | |
]] | |
-- Setup request payload | |
local function build_payload(image_base64) | |
return json.encode({ | |
contents = { | |
{ | |
role = "user", | |
parts = { | |
{ text = prompt_text }, | |
{ | |
inlineData = { | |
mimeType = "image/jpeg", | |
data = image_base64 | |
} | |
} | |
} | |
} | |
}, | |
generationConfig = { | |
temperature = 1, | |
topP = 0.95, | |
maxOutputTokens = 8192, | |
responseModalities = { "TEXT" } | |
}, | |
safetySettings = { | |
{ category = "HARM_CATEGORY_HATE_SPEECH", threshold = "BLOCK_NONE" }, | |
{ category = "HARM_CATEGORY_DANGEROUS_CONTENT", threshold = "BLOCK_NONE" }, | |
{ category = "HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold = "BLOCK_NONE" }, | |
{ category = "HARM_CATEGORY_HARASSMENT", threshold = "BLOCK_NONE" } | |
} | |
}) | |
end | |
-- Make API call | |
local function generate() | |
local image_base64 = read_image_base64("20250506-153543.jpg") | |
--print("Image: " .. image_base64 .. "\n") | |
local payload = build_payload(image_base64) | |
--print("Payload: " .. payload .. "\n") | |
local response = {} | |
local _, status = http.request{ | |
url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-001:streamGenerateContent?key={your_api_key}", | |
method = "POST", | |
headers = { | |
["Content-Type"] = "application/json", | |
["Content-Length"] = tostring(#payload) | |
}, | |
source = ltn12.source.string(payload), | |
sink = ltn12.sink.table(response) | |
} | |
if status == 200 then | |
--print(table.concat(response)) | |
local decoded, _, err = json.decode(table.concat(response)) | |
if not decoded then | |
print("JSON decode error:", err) | |
return | |
end | |
-- Combine all `parts[].text` from every item in the array | |
local combined_text = "" | |
for _, item in ipairs(decoded) do | |
if item.candidates and item.candidates[1] and item.candidates[1].content and item.candidates[1].content.parts then | |
local parts = item.candidates[1].content.parts | |
for _, part in ipairs(parts) do | |
if part.text then | |
combined_text = combined_text .. part.text | |
end | |
end | |
end | |
end | |
combined_text = string.gsub(combined_text, '```json', '') | |
combined_text = string.gsub(combined_text, '```', '') | |
print("Combined output:\n", combined_text) | |
else | |
print("Request failed with status:", status) | |
end | |
end | |
generate() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment