Last active
September 7, 2024 00:13
-
-
Save Saruspete/6270469f622140a08f74e23ec28045a6 to your computer and use it in GitHub Desktop.
A lua script for nextcloud files_script to run OCR with tesseract and put the output as comment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local fileObj = get_input_files()[1] | |
if (fileObj == nil or not is_file(fileObj) ) then | |
abort("No file name was provided") | |
end | |
local fileMeta = meta_data(fileObj) | |
if ( not string.find(fileMeta.mimetype, "image/") ) then | |
abort("File ".. fileObj.name .. " is not an image (mimetype:" .. fileMeta.mimetype ..")") | |
end | |
local filePath = fileMeta.local_path | |
local fileOwner = fileMeta.owner_id | |
-- Run OCR cmd | |
local tesseractCmd = "tesseract '" .. filePath .. "' stdout" | |
local tesseractRes = shell_command(tesseractCmd) | |
if (tesseractRes == nil) then | |
abort("Failed to execute command '".. tesseractCmd.."'. Error:".. tesseractRes.errors .." Code:".. tesseractRes.exit_code) | |
end | |
-- ---------------------------------------------------------------- | |
-- Clean the comment (remove non-visible & special chars, and trim message) | |
local comment = tesseractRes.output | |
comment = comment:gsub("[^A-Za-z0-9_+\\- ]", " ") | |
comment = comment:sub(0, 1000) | |
-- only add comment if tesseract found something | |
if (comment ~= "") then | |
-- Workaround when script is run from occ, users_find() returns nil. Using file owner in that case | |
local commentUser = users_find() | |
if (userCurrent == nil) then | |
commentUser = users_find(nil, fileOwner) | |
end | |
commentUser = commentUser[1] | |
commentOpt = { unsafe_impersonate_user = commentUser } | |
commentRes = comment_create(comment, fileObj, commentOpt) | |
if (commentRes == nil) then | |
abort("Failed to add comment '".. comment .."' on file '".. fileObj.name .. "' (cmd: '" .. tesseractCmd .. "', stderr: '".. tesseractRes.errors .."', return:".. tesseractRes.exit_code ..")") | |
end | |
end | |
-- Remove tag if triggered from it | |
local ocrTagName = "run:ocrimg" | |
local fileTags = get_file_tags(fileObj) | |
if (fileTags ~= nil) then | |
for _, tag in ipairs(fileTags) do | |
if (tag.name == ocrTagName) then | |
if (tag_file_unassign(fileObj, tag) == false) then | |
abort("Failed to unassign tag ".. ocrTagName) | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment