Last active
December 15, 2015 02:01
-
-
Save florianeckerstorfer/cb78e8ca027e82935a8f to your computer and use it in GitHub Desktop.
OCRs an image and adds the extracted text as Spotlight comment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env php | |
<?php | |
/** | |
* ocrcomment INPUT | |
* ================ | |
* | |
* > Takes an image, performs OCR and stores the text as Spotlight comment | |
* > BAM! You can search for your images using Spotlight. | |
* | |
* By Florian Eckerstorfer (https://florian.ec, @Florian_) | |
* | |
* Installation | |
* ============ | |
* This script uses Tesseract to perform the OCR, you can use Homebrew to install it: | |
* $ brew install tesseract | |
* | |
* Add execution permissions and copy this script in a directory that is in your $PATH | |
* $ chmod +x ocrcomment | |
* $ mv ocrcomment ~/bin | |
* | |
* Optionally ocrcomment will run the image through "textcleaner" if it is installed. This can | |
* improve the results for many images. | |
* The script is available from: http://www.fmwconcepts.com/imagemagick/textcleaner/index.php | |
* | |
* Usage | |
* ===== | |
* $ ocrcomment img.jpg | |
* | |
* You can check if the Spotlight Comment has been set: | |
* $ xattr -l img.jpg | |
* | |
*/ | |
if (!isset($_SERVER['argv'][1])) { | |
echo "Usage: ocrcomment INPUT_IMAGE\n"; | |
exit(1); | |
} | |
if (empty(exec('which tesseract'))) { | |
echo "Please install \"tesseract\"."; | |
exit(1); | |
} | |
$fileName = $cleanName = $_SERVER['argv'][1]; | |
// If "textcleaner" is installed execute it | |
if (exec('which textcleaner')) { | |
$cleanName = preg_replace('/\.(jpg|jpeg|png|gif)/', '.clean.$1', $fileName); | |
exec("textcleaner \"$fileName\" \"$cleanName\""); | |
} | |
// Perform OCR | |
exec("tesseract \"$cleanName\" \"$fileName\""); | |
// Spotlight Comments must be a plist in binary form | |
// → http://stackoverflow.com/a/8555633/776654 | |
file_put_contents("$fileName.plist", '<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"><plist version="1.0"><string>'.file_get_contents("$fileName.txt").'</string></plist>'); | |
exec("plutil -convert binary1 \"$fileName.plist\""); | |
// Set Spotlight Comment | |
exec("xattr -wx \"com.apple.metadata:kMDItemFinderComment\" \"`xxd -ps $fileName.plist`\" \"$fileName\""); | |
// Cleaning up | |
unlink("$fileName.txt"); | |
unlink("$fileName.plist"); | |
if ($fileName !== $cleanName) { | |
unlink($cleanName); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment