Created
December 8, 2014 23:26
-
-
Save ckirkendall/d76f025f1a205f5dacc9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns ocr-kata.core | |
(:require [clojure.java.io :refer [writer reader resource]])) | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
;; we treat this OCR parser similar to a language | |
;; parser where input into an AST that is | |
;; transform and tagged by diffrent analysis steps | |
;; the ast is then passed to an emmiter. | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
(def numbers {[0 1 0 1 0 1 1 1 1] 0 | |
[0 0 0 0 0 1 0 0 1] 1 | |
[0 1 0 0 1 1 1 1 0] 2 | |
[0 1 0 0 1 1 0 1 1] 3 | |
[0 0 0 1 1 1 0 0 1] 4 | |
[0 1 0 1 1 0 0 1 1] 5 | |
[0 1 0 1 1 0 1 1 1] 6 | |
[0 1 0 0 0 1 0 0 1] 7 | |
[0 1 0 1 1 1 1 1 1] 8 | |
[0 1 0 1 1 1 0 1 1] 9}) | |
(defn log-output [& out] | |
(apply println out) | |
(last out)) | |
(defn line-to-bin [line] | |
(partition 3 (map #(if (= % \space) 0 1) line))) | |
(defn check-sum [nums] | |
(zero? (mod (apply + (map * (reverse nums) (range 1 10))) 11))) | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
;; base tagging trasforms | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
(defn tag-numbers [{bin :bin :as data}] | |
(assoc data :nums (map numbers bin))) | |
(defn tag-check-sum [{ill :ill nums :nums :as data}] | |
(if-not ill | |
(assoc data :chksm (check-sum nums)) | |
data)) | |
(defn tag-illegible [{nums :nums :as data}] | |
(assoc data :ill (some nil? nums))) | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
;; Error correction logic and tagging | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
(defn flip-bit [bin idx] | |
(assoc bin idx (if (pos? (nth bin idx)) 0 1))) | |
(defn chg-perms [bin] | |
(let [vbin (vec bin)] | |
(map #(flip-bit vbin %) (range (count bin))))) | |
(defn get-psbl-acct-nums [{:keys [nums bin]}] | |
(let [perms (chg-perms (apply concat bin)) | |
bins (map #(partition 9 %) perms) | |
accts (for [bs bins] (map numbers bs)) | |
legit (filter #(not-any? nil? %) accts)] | |
(filter check-sum legit))) | |
(defn tag-err-options [{ill :ill nums :nums chksum :chksum :as data}] | |
(assoc data :opts (get-psbl-acct-nums data))) | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
;; formating ouput | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
(defn format-amb-options [opts] | |
(str " AMB ['" (apply str (interpose "', '" (map #(apply str %) opts))) "']")) | |
(defn format-output [{:keys [nums ill chksm opts] :as data}] | |
(let [cnt-opts (count opts) | |
num-str (apply str (map #(if % % \?) nums))] | |
(cond | |
(= 1 cnt-opts) (apply str (first opts)) | |
(pos? cnt-opts) (str num-str (format-amb-options opts)) | |
(or ill (not chksm)) (str num-str " ILL") | |
:else num-str))) | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
;; core parsing logic | |
;; the input is threaded thorugh a series | |
;; of transforms and analyse steps before | |
;; being formated for output | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
(defn convert-lines [lines] | |
(->> (take 3 lines) | |
(map line-to-bin) | |
(apply map concat) | |
(assoc {} :bin) | |
(tag-numbers) | |
(tag-illegible) | |
(tag-check-sum) | |
(tag-err-options) | |
(format-output))) | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
;; entry points | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
(defn load-ocr-file [file] | |
(with-open [r (reader (resource file))] | |
(doall (map convert-lines (partition 4 (line-seq r)))))) | |
(defn convert-file [in-file out-file] | |
(let [lines (load-ocr-file in-file)] | |
(with-open [w (writer out-file)] | |
(doseq [line lines] | |
(.write w (str line "\n")))))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment