Created
April 24, 2017 00:23
-
-
Save cesardeazevedo/91c7e4e3c4aa3acab3cc2fb34d337427 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(import os) | |
(import pytesseract) | |
(import [PIL [Image]]) | |
(defclass PDFHelper [] | |
(defn --init-- [self dir] | |
"Constructor" | |
(setv self.dir dir) | |
(setv self.images (self.read-dir)) | |
(self.parse-images) | |
(self.print-output)) | |
(defn read-dir [self] | |
"Read directory images" | |
(.listdir os self.dir)) | |
(defn parse-images [self] | |
(for [image self.images] (self.parse-image image))) | |
(defn parse-image [self image] | |
"Parse image with pytesseract" | |
(setv self.file | |
(.image_to_string pytesseract | |
(.open Image (+ self.dir (+ "/" image)))))) | |
(defn print-output [self] | |
(print self.file))) | |
(defmain [&rest args] | |
(def dir (get args 1)) | |
(def app (PDFHelper dir))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment