Last active
October 3, 2015 14:59
-
-
Save holdenhinkle/4d4e39680c8271f964b3 to your computer and use it in GitHub Desktop.
ocr_v2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'pry' | |
class OCR | |
attr_reader :text | |
def initialize(text) | |
@text = text | |
end | |
def convert | |
if single_digit? | |
convert_single_digit(text) | |
elsif multiple_digits_in_line? | |
convert_line(text) | |
elsif multiple_lines? | |
convert_paragraph(text) | |
end | |
end | |
private | |
def single_digit? | |
text.length <= 15 | |
end | |
def multiple_digits_in_line? | |
text.length > 15 && !text.include?("\n\n") | |
end | |
def multiple_lines? | |
text.length > 15 && text.include?("\n\n") | |
end | |
def convert_single_digit(text) | |
case text | |
when " _\n| |\n|_|\n" then '0' | |
when "\n |\n |\n" then '1' | |
when " _\n _|\n|_\n" then '2' | |
when " _\n _|\n _|\n" then '3' | |
when "\n|_|\n |\n" then '4' | |
when " _\n|_\n _|\n" then '5' | |
when " _\n|_\n|_|\n" then '6' | |
when " _\n |\n |\n" then '7' | |
when " _\n|_|\n|_|\n" then '8' | |
when " _\n|_|\n _|\n" then '9' | |
when "\n" then ',' | |
else '?' | |
end | |
end | |
def convert_line(line) | |
result = '' | |
parse_line(line).each do |digit| | |
result << convert_single_digit(digit) | |
end | |
result | |
end | |
def parse_line(line) | |
if line == "\n" | |
[line] | |
else | |
rows = line.split("\n") | |
rows.each_with_index { |row, index| rows[index] = row.scan(/.{1,3}/).map!(&:rstrip) }.flatten! | |
num_count = rows.length / 3 | |
nums = [] | |
num_count.times { |num| nums << rows[num] + "\n" + rows[num_count + num] + "\n" + rows[num_count * 2 + num] + "\n" } | |
nums | |
end | |
end | |
def convert_paragraph(para) | |
result = '' | |
parse_paragraphs(para).each do |line| | |
result << convert_line(line) | |
end | |
result | |
end | |
def parse_paragraphs(para) | |
para.gsub!(/\n\n/, "\n*\n*").split("*") | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment