Last active
July 1, 2019 06:33
-
-
Save titaneric/6a3e3991521e81e45772559b76c32e3a to your computer and use it in GitHub Desktop.
OCR.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.common.keys import Keys | |
import urllib.request | |
from PIL import Image | |
from selenium.webdriver.support.select import Select | |
from PIL import ImageEnhance | |
import matplotlib.pyplot as plt | |
import cv2 | |
import numpy as np | |
import urllib.request | |
from lxml import etree | |
import os | |
import shutil | |
import sqlite3 | |
## get the image source | |
#img = driver.find_element_by_xpath('/html/body/img') | |
dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR') | |
for file in os.listdir(dir_path): | |
if file.endswith('.png'): | |
os.remove(dir_path + "\\" + file) | |
#dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set') | |
#for file in os.listdir(dir_path): | |
# if file.endswith('.png'): | |
# os.remove(dir_path + "\\" + file) | |
src = 'https://isdna1.yzu.edu.tw/CnStdSel/SelRandomImage.aspx' | |
#download the image | |
urllib.request.urlretrieve(src, "captcha.png") | |
#driver.close() | |
Im = Image.open('captcha.png') | |
width, height = Im.size | |
quartersizedIm = Im.resize((int(width * 6), int(height * 6))) | |
quartersizedIm.save('foursized.png') | |
im = Image.open('foursized.png') | |
xsize, ysize = im.size | |
enhancer = ImageEnhance.Contrast(im) | |
im = enhancer.enhance(3.0) | |
enhancer = ImageEnhance.Brightness(im) | |
im = enhancer.enhance(10.0) | |
xsize, ysize = im.size | |
im.save('enhance.png') | |
pix = im.load() | |
for y in range(0, ysize): | |
for x in range(0, xsize): | |
if pix[x , y][1] != 255 and pix[x , y][2] == 255 and pix[x , y][3] == 255: | |
pix[x , y] = (0 , 0 , 0 , 255) | |
else: | |
pix[x , y] = (255 , 255 , 255 , 255) | |
im.save('new.png') | |
kernel = np.ones((1, 1), np.uint16) | |
image = cv2.imread('new.png') | |
erosion = cv2.erode(image, kernel, iterations = 1) | |
blurred = cv2.GaussianBlur(erosion, (5, 5), 0) | |
edged = cv2.Canny(blurred, 30, 150) | |
plt.imshow(edged) | |
#plt.show() | |
dilation = cv2.dilate(edged, kernel, iterations = 1) | |
plt.imshow(dilation) | |
#plt.show() | |
cv2.imwrite('process.png', dilation) | |
img1 = cv2.imread('new.png') | |
img2 = cv2.imread('process.png') | |
img1 = cv2.bitwise_not(img1) | |
final = cv2.add(img1, img2) | |
#erosion = cv2.erode(final, kernel, iterations = 1) | |
#blurred = cv2.GaussianBlur(erosion, (5, 5), 0) | |
final = cv2.bitwise_not(final) | |
plt.imshow(final) | |
#plt.show() | |
#cv2.imwrite('final.png', final) | |
gray = cv2.cvtColor(final,cv2.COLOR_BGR2GRAY) | |
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)) | |
res = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel) | |
plt.imshow(res) | |
#plt.show() | |
cv2.imwrite('perfect.png', res) | |
def mse(imgA, imgB): | |
err = np.sum(imgA.astype("float") - imgB.astype("float") ** 2) | |
err /= float(imgA.shape[0] * imgA.shape[1]) | |
return err | |
def findBorder(axis, color, leng, img): | |
findNonTextColor = False | |
black = [0, 0, 0] | |
white = [255, 255, 255] | |
textBorder = axis + 1 | |
textColorNum = leng | |
cntWhite = 0 | |
while (not findNonTextColor): | |
#cntNum = 0 | |
#grayImg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
for i in range(leng):#decideRange[0], decideRange[1]): | |
if leng == img.shape[0]: | |
coordinate = i, axis | |
else: | |
coordinate = axis, i | |
# if column of image is black | |
if (img.item(coordinate[0], coordinate[1], 0) == color[0]) and (img.item(coordinate[0], coordinate[1], 1) == color[1]) and (img.item(coordinate[0], coordinate[1], 2) == color[2]): | |
#cntNum += 1 | |
if color == black: | |
findNonTextColor = True | |
textBorder = axis | |
break | |
else: | |
cntWhite += 1 | |
if cntWhite == leng: | |
textBorder = axis | |
findNonTextColor = True | |
break | |
axis += 1 | |
cntWhite = 0 | |
return textBorder | |
def splitWord(img, currentCol): | |
height, width = img.shape[:2] | |
black = [0, 0, 0] | |
white = [255, 255, 255] | |
line = currentCol + 1 | |
leftSide = findBorder(line, black, height, img) | |
line = leftSide | |
rightSide = findBorder(line, white,height, img) | |
line = 0 | |
topSide = findBorder(line, black, width, img) | |
line = topSide | |
bottomSide = findBorder(line, white, width, img) | |
charImg = img[topSide:bottomSide, leftSide:rightSide] | |
return charImg, rightSide | |
def rotateImage(image, angle): | |
(h, w) = image.shape[:2] | |
center = (w / 2, h / 2) | |
M = cv2.getRotationMatrix2D(center, angle, 1.0) | |
rotated = cv2.warpAffine(image, M, (w, h), borderMode = cv2.BORDER_CONSTANT, borderValue = (255, 255, 255)) | |
return rotated | |
def makeInnerBorder(img, left, right, top, bottom, color): | |
height, width = img.shape[:2] | |
for i in range(height): | |
for l in range(left): | |
img[i, l] = color | |
for r in range(right): | |
img[i, r] = color | |
for j in range(width): | |
for t in range(top): | |
img[t, j] = color | |
for b in range(bottom): | |
img[b, j] = color | |
return img | |
def verticalProjection(img): | |
height, width = img.shape[:2] | |
projDict = dict() | |
for i in range(width): | |
projDict[i] = 0 | |
for h in range(height): | |
for w in range(width): | |
if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0: | |
projDict[w] += 1 | |
min_value = min(projDict.values()) | |
result = [key for key, value in projDict.items() if value == min_value] | |
while min(result) < 45: | |
projDict.pop(min(result)) | |
if len(projDict) != 0: | |
min_value = min(projDict.values()) | |
result = [key for key, value in projDict.items() if value == min_value] | |
if min(result) < 45: | |
return (width - 1) | |
else: | |
return min(result) | |
def findRectangleBorder(img): | |
height, width = img.shape[:2] | |
#find the left border | |
leftBorder = 0 | |
for w in range(width): | |
for h in range(height): | |
if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0: | |
leftBorder = w | |
break | |
rightBorder = width - 1 | |
#find the right border | |
for w in range(width - 1, 1, -1): | |
for h in range(height): | |
if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0: | |
rightBorder = w | |
break | |
topBorder = 0 | |
#find the top border | |
for h in range(height): | |
for w in range(width): | |
if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0: | |
topBorder = h | |
break | |
#find the bottom border | |
bottomBorder = height - 1 | |
for h in range(height - 1, 1, -1): | |
for w in range(width): | |
if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0: | |
bottomBorder = h | |
break | |
cutImg = img[bottomBorder:topBorder, rightBorder:leftBorder] | |
return cutImg | |
def elementaryRevise(img): | |
img = findRectangleBorder(img) | |
img = cv2.copyMakeBorder(img, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value = (255, 255, 255)) | |
img = cv2.resize(img, (60 ,80)) | |
return img | |
def countRate(file): | |
img = cv2.imread(file) | |
height, width = img.shape[:2] | |
count = 0 | |
for h in range(height): | |
for w in range(width): | |
if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0: | |
count += 1 | |
if (count / img.size) < 0.05: | |
os.remove(file) | |
def segmentation(img): | |
cntChar = 0 | |
currentCol = 0 | |
rotateImg = rotateImage(img, 180) | |
#cv2.imwrite('rotate.png', rotateImg) | |
rotateImg = makeInnerBorder(rotateImg, 1, 1, 1, 1, [255, 255, 255]) | |
right = findBorder(0, [0,0,0], rotateImg.shape[0], rotateImg) | |
right = img.shape[1] - right + 2 | |
#print(right) | |
while abs(currentCol - right) > 2: | |
#print(currentCol) | |
image, currentCol = splitWord(img, currentCol) | |
image = cv2.copyMakeBorder(image, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255)) | |
#image = elementaryRevise(image) | |
if image.shape[1] < 200: | |
image = elementaryRevise(image) | |
cv2.imwrite('{cntChar}.png'.format(**locals()), image) | |
cntChar += 1 | |
else: | |
cntDict = dict() | |
for i in range(1, 21): | |
rotateChar = rotateImage(image, i) | |
rotateChar = findRectangleBorder(rotateChar) | |
#cv2.imwrite('revised{i}.png'.format(**locals()), rotateChar) | |
cntDict[i] = verticalProjection(rotateChar) | |
bestAngle = min(cntDict, key = cntDict.get) | |
rotatedImg = rotateImage(image, bestAngle) | |
rotatedImg = findRectangleBorder(rotatedImg) | |
min_cut = cntDict[bestAngle] | |
cv2.imwrite('rotated_{bestAngle}_degree.png'.format(**locals()), rotatedImg) | |
new_cut = rotatedImg | |
cv2.rectangle(new_cut, (min_cut + 1, 0), (new_cut.shape[1], new_cut.shape[0]), (255, 255, 255), -1) | |
#new_cut = rotatedImg[0:rotatedImg.shape[0], 0:min_cut] | |
new_cut = rotateImage(new_cut, 360 - bestAngle) | |
new_cut = cv2.copyMakeBorder(new_cut, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255)) | |
new_cut = elementaryRevise(new_cut) | |
cv2.imwrite('{cntChar}.png'.format(**locals()), new_cut) | |
cntChar += 1 | |
rotatedImg = cv2.imread('rotated_{bestAngle}_degree.png'.format(**locals())) | |
remainImg = rotatedImg | |
cv2.rectangle(remainImg, (0, 0), (min_cut, remainImg.shape[0]), (255, 255, 255), -1) | |
#remainImg = rotatedImg[0:rotatedImg.shape[0], (min_cut + 1):rotatedImg.shape[1]] | |
#plt.imshow(remainImg) | |
#plt.show() | |
#the real angle to rotate back need to estimate | |
remainImg = rotateImage(remainImg, 360 - bestAngle) | |
remainImg = cv2.copyMakeBorder(remainImg, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255)) | |
#deal the remaining character | |
if remainImg.shape[1] < 200: | |
remainImg = elementaryRevise(remainImg) | |
cv2.imwrite('{cntChar}.png'.format(**locals()), remainImg) | |
cntChar += 1 | |
while remainImg.shape[1] > 200: | |
#cv2.imwrite('remain.png', remainImg) | |
#for i in range(1, 21): | |
#rotateChar = rotateImage(remainImg, i) | |
#rotateChar = findRectangleBorder(rotateChar) | |
#cv2.imwrite('revised{i}.png'.format(**locals()), rotateChar) | |
rotateChar = rotateImage(remainImg, bestAngle) | |
rotateChar = findRectangleBorder(rotateChar) | |
cv2.imwrite('rotated_{bestAngle}.png'.format(**locals()), rotateChar) | |
cntDict[bestAngle] = verticalProjection(rotateChar) | |
#bestAngle = min(cntDict, key = cntDict.get) | |
#rotatedImg = rotateImage(image, bestAngle) | |
#rotatedImg = findRectangleBorder(rotatedImg) | |
min_cut = cntDict[bestAngle] | |
new_cut = rotateChar | |
cv2.rectangle(new_cut, (min_cut + 1, 0), (new_cut.shape[1], new_cut.shape[0]), (255, 255, 255), -1) | |
#new_cut = rotatedImg[0:rotatedImg.shape[0], 0:min_cut] | |
new_cut = rotateImage(new_cut, 360 - bestAngle) | |
new_cut = cv2.copyMakeBorder(new_cut, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255)) | |
new_cut = elementaryRevise(new_cut) | |
cv2.imwrite('{cntChar}.png'.format(**locals()), new_cut) | |
cntChar += 1 | |
rotatedImg = cv2.imread('rotated_{bestAngle}.png'.format(**locals())) | |
remainImg = rotatedImg | |
#plt.imshow(remainImg) | |
#plt.show() | |
cv2.rectangle(remainImg, (0, 0), (min_cut, remainImg.shape[0]), (255, 255, 255), -1) | |
#remainImg = rotatedImg[0:rotatedImg.shape[0], (min_cut + 1):rotatedImg.shape[1]] | |
#plt.imshow(remainImg) | |
#plt.show() | |
remainImg = rotateImage(remainImg, 360 - bestAngle) | |
remainImg = cv2.copyMakeBorder(remainImg, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255)) | |
if remainImg.shape[1] < 200: | |
remainImg = elementaryRevise(remainImg) | |
cv2.imwrite('{cntChar}.png'.format(**locals()), remainImg) | |
cntChar += 1 | |
#print('rotated_{bestAngle}_degree.png'.format(**locals()), bestAngle, min_cut) | |
#find the least number of text color of column | |
#if cntNum < textColorNum: | |
# textColorNum = cntNum | |
image = cv2.imread("perfect.png") | |
image = cv2.copyMakeBorder(image, 30, 30, 30, 30,cv2.BORDER_CONSTANT, value = (255, 255, 255)) | |
segmentation(image) | |
#dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR') | |
for file in os.listdir(dir_path): | |
if file.endswith('.png'): | |
countRate(file) | |
if len(file) == 5 and os.path.isfile(file) : | |
shutil.move('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\' + file,'C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set\\' + file) | |
dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set') | |
def trainData(img): | |
conn = sqlite3.connect('dataset.sqlite') | |
cur = conn.cursor() | |
cur.execute('SELECT * FROM Data') | |
min = float('inf') | |
for row in cur: | |
ablob = row[0] | |
with open('test.png', 'wb') as output_file: | |
output_file.write(ablob) | |
dataImg = cv2.imread('test.png') | |
err = mse(dataImg, img) | |
if err < min: | |
min = err | |
char = row[1] | |
return char | |
def mse(img1, img2): | |
err = np.sum((img1.astype('float') - img2.astype('float'))**2) | |
err /= float(img1.shape[0] * img1.shape[1]) | |
return err | |
for file in os.listdir(dir_path): | |
imgFile = cv2.imread(dir_path + "\\" + file) | |
plt.imshow(imgFile) | |
plt.show() | |
char = trainData(imgFile) | |
print(char) | |
judge = input() | |
if judge == 'y': | |
f = open(dir_path + "\\" + file, 'rb') | |
ablob = f.read() | |
conn = sqlite3.connect('dataset.sqlite') | |
cur = conn.cursor() | |
cur.execute(''' | |
INSERT INTO Data(img, label) | |
VALUES(?, ?)''',(sqlite3.Binary(ablob), char.upper())) | |
conn.commit() | |
conn.close() | |
f.close() | |
for file in os.listdir(dir_path): | |
if file.endswith('.png'): | |
os.remove(dir_path + "\\" + file) | |
def productTestData(): | |
for file in os.listdir(dir_path): | |
imgFile = cv2.imread(dir_path + "\\" + file) | |
plt.imshow(imgFile) | |
plt.show() | |
target = input("The target is ") | |
if not target.isspace(): | |
f = open(dir_path + "\\" + file, 'rb') | |
ablob = f.read() | |
conn = sqlite3.connect('dataset.sqlite') | |
cur = conn.cursor() | |
cur.execute(''' | |
INSERT INTO Data(img, label) | |
VALUES(?, ?)''',(sqlite3.Binary(ablob), target.upper())) | |
conn.commit() | |
conn.close() | |
f.close() | |
#sm = browser.find_element_by_name('ibnSubmit').click() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment