fsouza · November 6, 2010 15:15
diff --git a/ocr.py b/ocr.py
 class OCR :
 	
 	contains_set = {
 		(0, 1) : set([0, 2, 3, 5, 6, 7, 8, 9]),
 		(1, 0) : set([0, 4, 5, 6, 8, 9]),
 		(1, 1) : set([2, 3, 4, 5, 6, 8, 9]),
 		(1, 2) : set([0, 1, 2, 3, 4, 7, 8, 9]),
 		(2, 0) : set([0, 2, 6, 8]),
 		(2, 1) : set([0, 2, 3, 5, 6, 8, 9]),
 		(2, 2) : set([0, 1, 3, 4, 5, 6, 7, 8, 9])
 	}
 	
 	def parse_number(self, number):
 		full_list = set(range(10))
 		dont_contains_set = {}
 		for i in range(3):
 			for j in range(3):
 				try:
 					the_set = self.contains_set[i, j]
 					dont_contains_set[i, j] = full_list.difference(the_set)
 					if number[i][j] == ' ':
 						full_list = full_list.intersection(dont_contains_set[i, j])
 					else:
 						full_list = full_list.intersection(self.contains_set[i, j])
 				except KeyError:
 					pass
 				
 				if len(full_list) == 1:
 					return list(full_list)[0]
 	
 	def parse_account(self, input_str):
 		if len(input_str.splitlines()) != 4:
 			raise WrongInputError
 		
 		lines = input_str.splitlines()[1:]
 		for line in lines:			
 			if len(line) != 27:
 				raise WrongInputError("Wrong columns size: %d." % len(line))
 				
 			l = line.replace(' ', '').replace('_', '').replace('|', '')
 			if len(l) > 0:
 				raise WrongInputError("Invalid character found!")
 		
 		to_return = ''
 		for i in range(0, 27, 3):
 			mat = []
 			mat.append(lines[0][i:i+3])
 			mat.append(lines[1][i:i+3])
 			mat.append(lines[2][i:i+3])
 			
 			to_return += str(self.parse_number(mat))
 			
 		return int(to_return)

 class WrongInputError(Exception):
 	pass
diff --git a/test_ocr.py b/test_ocr.py
 import unittest
 from nose.tools import *
 from ocr import *

 class OCRSpec(unittest.TestCase):
 	
 	def setUp(self):
 		self.ocr = OCR()
 	
 	@raises(WrongInputError)
 	def it_should_raise_exception_when_input_is_wrong(self):
 		input_str = \
 		"""
 $    _  _  _  _  _  _     _ 
 $|_||_|| || ||_   |  |  ||_ 
 $  | _||_||_||_|  |  |  | _|"""
 		self.ocr.parse_account(input_str)

 	@raises(WrongInputError)
 	def it_should_only_accept_pipe_spaces_and_underline(self):
 		input_str = \
 """
    _  _  _  _  _  _     _ 
 |_||2|| || ||_   |  |  ||_ 
  | _||_||_||_|  |  |  | _|"""
 		self.ocr.parse_account(input_str)
 		
 	def it_should_return_490067715(self):
 		input_str = \
 """
    _  _  _  _  _  _     _ 
 |_||_|| || ||_   |  |  ||_ 
  | _||_||_||_|  |  |  | _|"""
 		assert_equals(self.ocr.parse_account(input_str), 490067715)
 		
 	def it_should_return_490067716(self):
 		input_str = \
 """
    _  _  _  _  _  _     _ 
 |_||_|| || ||_   |  |  ||_ 
  | _||_||_||_|  |  |  ||_|"""
 		assert_equals(self.ocr.parse_account(input_str), 490067716)
 		
 		
 	def it_should_return_123456789(self):
 		input_str = \
 """
    _  _     _  _  _  _  _ 
  | _| _||_||_ |_   ||_||_|
  ||_  _|  | _||_|  ||_| _|"""
 		assert_equals(self.ocr.parse_account(input_str), 123456789)
 		
 	def it_should_have_3_lines_and_27_colums_per_line(self):
 		input_str = \
 """
    _  _  _  _  _  _     _ 
 |_||_|| || ||_   |  |  ||_ 
  | _||_||_||_|  |  |  | _|"""
 		self.ocr.parse_account(input_str)
 		
 	def it_should_return_8(self):
 		input_matrix = [' _ ', '|_|', '|_|']
 		assert_equals(self.ocr.parse_number(input_matrix), 8)
 		
 	def it_should_return_4(self):
 		input_matrix = ['   ', '|_|', '  |']
 		assert_equals(self.ocr.parse_number(input_matrix), 4)
 		
 	def it_should_return_1(self):
 		input_matrix = ['   ', '  |', '  |']
 		assert_equals(self.ocr.parse_number(input_matrix), 1)
	class OCR :

	contains_set = {
	(0, 1) : set([0, 2, 3, 5, 6, 7, 8, 9]),
	(1, 0) : set([0, 4, 5, 6, 8, 9]),
	(1, 1) : set([2, 3, 4, 5, 6, 8, 9]),
	(1, 2) : set([0, 1, 2, 3, 4, 7, 8, 9]),
	(2, 0) : set([0, 2, 6, 8]),
	(2, 1) : set([0, 2, 3, 5, 6, 8, 9]),
	(2, 2) : set([0, 1, 3, 4, 5, 6, 7, 8, 9])
	}

	def parse_number(self, number):
	full_list = set(range(10))
	dont_contains_set = {}
	for i in range(3):
	for j in range(3):
	try:
	the_set = self.contains_set[i, j]
	dont_contains_set[i, j] = full_list.difference(the_set)
	if number[i][j] == ' ':
	full_list = full_list.intersection(dont_contains_set[i, j])
	else:
	full_list = full_list.intersection(self.contains_set[i, j])
	except KeyError:
	pass

	if len(full_list) == 1:
	return list(full_list)[0]

	def parse_account(self, input_str):
	if len(input_str.splitlines()) != 4:
	raise WrongInputError

	lines = input_str.splitlines()[1:]
	for line in lines:
	if len(line) != 27:
	raise WrongInputError("Wrong columns size: %d." % len(line))

	l = line.replace(' ', '').replace('_', '').replace('\|', '')
	if len(l) > 0:
	raise WrongInputError("Invalid character found!")

	to_return = ''
	for i in range(0, 27, 3):
	mat = []
	mat.append(lines[0][i:i+3])
	mat.append(lines[1][i:i+3])
	mat.append(lines[2][i:i+3])

	to_return += str(self.parse_number(mat))

	return int(to_return)

	class WrongInputError(Exception):
	pass
	import unittest
	from nose.tools import *
	from ocr import *

	class OCRSpec(unittest.TestCase):

	def setUp(self):
	self.ocr = OCR()

	@raises(WrongInputError)
	def it_should_raise_exception_when_input_is_wrong(self):
	input_str = \
	"""
	$ _ _ _ _ _ _ _
	$\|_\|\|_\|\| \|\| \|\|_ \| \| \|\|_
	$ \| _\|\|_\|\|_\|\|_\| \| \| \| _\|"""
	self.ocr.parse_account(input_str)

	@raises(WrongInputError)
	def it_should_only_accept_pipe_spaces_and_underline(self):
	input_str = \
	"""
	_ _ _ _ _ _ _
	\|_\|\|2\|\| \|\| \|\|_ \| \| \|\|_
	\| _\|\|_\|\|_\|\|_\| \| \| \| _\|"""
	self.ocr.parse_account(input_str)

	def it_should_return_490067715(self):
	input_str = \
	"""
	_ _ _ _ _ _ _
	\|_\|\|_\|\| \|\| \|\|_ \| \| \|\|_
	\| _\|\|_\|\|_\|\|_\| \| \| \| _\|"""
	assert_equals(self.ocr.parse_account(input_str), 490067715)

	def it_should_return_490067716(self):
	input_str = \
	"""
	_ _ _ _ _ _ _
	\|_\|\|_\|\| \|\| \|\|_ \| \| \|\|_
	\| _\|\|_\|\|_\|\|_\| \| \| \|\|_\|"""
	assert_equals(self.ocr.parse_account(input_str), 490067716)


	def it_should_return_123456789(self):
	input_str = \
	"""
	_ _ _ _ _ _ _
	\| _\| _\|\|_\|\|_ \|_ \|\|_\|\|_\|
	\|\|_ _\| \| _\|\|_\| \|\|_\| _\|"""
	assert_equals(self.ocr.parse_account(input_str), 123456789)

	def it_should_have_3_lines_and_27_colums_per_line(self):
	input_str = \
	"""
	_ _ _ _ _ _ _
	\|_\|\|_\|\| \|\| \|\|_ \| \| \|\|_
	\| _\|\|_\|\|_\|\|_\| \| \| \| _\|"""
	self.ocr.parse_account(input_str)

	def it_should_return_8(self):
	input_matrix = [' _ ', '\|_\|', '\|_\|']
	assert_equals(self.ocr.parse_number(input_matrix), 8)

	def it_should_return_4(self):
	input_matrix = [' ', '\|_\|', ' \|']
	assert_equals(self.ocr.parse_number(input_matrix), 4)

	def it_should_return_1(self):
	input_matrix = [' ', ' \|', ' \|']
	assert_equals(self.ocr.parse_number(input_matrix), 1)