Skip to content

Instantly share code, notes, and snippets.

@khan-hasan
Created February 12, 2019 19:57
Show Gist options
  • Save khan-hasan/3d139b90a8dbf2c3d3986607601fb1e4 to your computer and use it in GitHub Desktop.
Save khan-hasan/3d139b90a8dbf2c3d3986607601fb1e4 to your computer and use it in GitHub Desktop.
import csv
import getpass
import math
import os
import sys
import time
# pip install opencv-python
import cv2 as cv
import numpy as np
# pip install pymssql
import pymssql as ps
def separate_parts(img, file_name):
# Identifies contours and separates into images
WHITE = [255, 255, 255]
kernel = np.ones((3, 3), np.uint8)
new_file_number = 0
newImage = img
# Thresholds grayscale image and inverts B/W image
erode = cv.erode(newImage, kernel, iterations=1)
ret, thresh = cv.threshold(erode, 75, 255, cv.THRESH_BINARY_INV)
im2, contours, hierarchy = cv.findContours(
thresh, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
if (len(contours) == 1):
# Checks if the detected contours only found the border. If so, remove the border and try again.
c = contours[0]
peri = cv.arcLength(c, True)
approx = cv.approxPolyDP(c, 0.02 * peri, True)
if (len(approx) == 4):
x, y, w, h = cv.boundingRect(c)
cv.rectangle(thresh, (x, y), (x + w, y + h), (0, 0, 0), 6)
im2, contours, hierarchy = cv.findContours(
thresh, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
if (cursor.rowcount == 0):
error_log.write("No data for {0}\n".format(file_name))
for row in cursor:
# Processes contours in image, crops each part, and appends filename
new_file_number = int(row['grafikhs_bildposnr'])
new_file_name = file_name + '_' + repr(new_file_number) + '.png'
x_mid = int(row['grafikhs_topleft_x']) + 9
y_mid = int(row['grafikhs_topleft_y']) + 11
compare = sys.maxsize
nearest_contour = 0
for contour_number, contour in enumerate(contours):
# Iterate through contours to find one closest to point, store in nearest_contour
# sample = cv.cvtColor(newImage, cv.COLOR_GRAY2BGR)
weight = cv.contourArea(contour)
x, y, w, h = cv.boundingRect(contour)
dist = abs(cv.pointPolygonTest(contour, (x_mid, y_mid), 1))
area = w * h
ratio = weight / area
if dist < compare and ratio > 0.10 and not (w < 10 or h < 10):
compare = dist
nearest_contour = contour_number
# Envelope nearest contour to check its properties
x, y, w, h = cv.boundingRect(contours[nearest_contour])
mask = np.zeros(img.shape, dtype="uint8")
# Check if contour is line, find endpoints' distance to number point
if (w < 10 or h < 10):
contours = np.delete(contours, nearest_contour)
dist = math.hypot(x - x_mid, y - y_mid)
dist2 = math.hypot(x + w - x_mid, y + h - y_mid)
x_line = 0
y_line = 0
if dist2 > dist:
x_line = x + w
y_line = y + h
else:
x_line = x
y_line = y
cv.rectangle(thresh, (x - 1, y - 1),
(x + w + 1, y + h + 1), 0, -1)
compare = sys.maxsize
nearest_contour = 0
for contour_number, contour in enumerate(contours):
# Iterate through all contours again, this time for nearest one to line point
x, y, w, h = cv.boundingRect(contour)
weight = cv.contour_area(contour)
area = w * h
ratio = weight / area
dist_list = []
dist_list.append(abs(cv.pointPolygonTest(
contour, (x_mid, y_start), 1)))
dist_list.append(
abs(cv.pointPolygonTest(contour, (x_mid, y_end), 1)))
dist_list.append(abs(cv.pointPolygonTest(
contour, (x_start, y_mid), 1)))
dist_list.append(
abs(cv.pointPolygonTest(contour, (x_end, y_mid), 1)))
dist_least = min(dist_list)
if dist < compare and ratio > .15 and not (w < 10 or h < 10):
compare = dist
nearest_contour = contour_number
x, y, w, h = cv.boundingRect(contours[nearest_contour])
cv.drawContours(mask, contours, nearest_contour, WHITE, -1)
# Saves final image with margins
test = cv.bitwise_and(newImage, mask)
mask = cv.bitwise_not(mask)
final_image = cv.bitwise_or(test, mask)
final_image = final_image[y:y + h, x:x + w]
final_image = cv.copyMakeBorder(
final_image, 50, 50, 50, 50, cv.BORDER_CONSTANT, value=WHITE)
output_file = os.path.join(out_path, new_file_name)
cv.imwrite(output_file, final_image)
def erase_numbers(img):
# Check if the contour is a number
# Contour detection run separately due to different source images
sample = cv.cvtColor(img, cv.COLOR_GRAY2BGR)
img_height, img_width = img.shape[:2]
for row in cursor:
part_number = int(row['grafikhs_bildposnr'])
# Start points pulled from database
x_start = int(row['grafikhs_topleft_x'])
y_start = int(row['grafikhs_topleft_y'])
x_end = int(row['grafikhs_bottomright_x'])
y_end = int(row['grafikhs_bottomright_y'])
x_mid = int((x_start + x_end) / 2)
y_mid = int((y_start + y_end) / 2)
# Cover up number
cv.rectangle(img, (x_start, y_start),
(x_end, y_end), (255, 255, 255), -1)
# Set ROI for finding circles
x_start = y_mid - 27
x_end = y_mid + 27
y_start = y_mid - 27
y_end = y_mid + 27
if x_start < 0:
x_start = 0
if y_start < 0:
y_start = 0
if x_end > img_width:
x_end = img_width
if y_end > img_height:
y_end = img_height
working = img[y_start:y_end, x_start:x_end]
circles = cv.HoughCircles(
working, cv.HOUGH_GRADIENT, 4, 47, minRadius=18, maxRadius=27
)
# If circle is found, cover up circle
if circles is not None:
cv.circle(img, (x_mid, y_mid), 27, (255, 255, 255), -1)
return img
# Deprecated. Finds circles, saves their coordinates, then checks the contour size to ensure it is a circle.
def erase_circles(img):
kernel = np.ones((2, 2), np.uint8)
dilate = cv.dilate(img, kernel, iterations=1)
eroded = cv.erode(dilate, kernel, iterations=3)
blur = cv.medianBlur(eroded, 3)
circles = cv.HoughCircles(
blur, cv.HOUGH_GRADIENT, 2.5, 47, minRadius=20, maxRadius=27
)
compare = sys.maxsize
nearest_contour = 0
sample = cv.cvtColor(img, cv.COLOR_GRAY2BGR)
if circles is not None:
circles = np.round(circles[0, :]).astype("int")
for (x, y, r) in circles:
cv.circle(img, (x, y), 26, (255, 255, 255), -1)
return img
def process_file(file):
file_name = os.path.splitext(file.name)[0]
img = cv.imread(file.path)
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
reset_cursor(file_name)
new = erase_numbers(img)
reset_cursor(file_name)
separate_parts(new, file_name)
def reset_cursor(file_name):
# Run this to set/reset db cursor position
cursor.execute(
"select * from [BMWETK].[dbo].[w_grafik_hs] where grafikhs_grafikid = %s", file_name
)
def main():
# Processes files in input directory
t_start = time.time()
file_count = 0
error_count = 0
for file in os.scandir(in_path):
if file.name.endswith('.TIF'):
print(file.name)
file_count += 1
try:
process_file(file)
except:
error_log.write("Error with {0}\n".format(file.name))
error_count += 1
# Evaluates process time and errors
t_sec = time.time() - t_start
seconds_per_file = t_sec / file_count
t_sec = round(t_sec)
(t_min, t_sec) = divmod(t_sec, 60)
(t_hour, t_min) = divmod(t_min, 60)
t_total = "{0}H:{1}M:{2}S".format(t_hour, t_min, t_sec)
print("Processed {0} pictures in about {1} with {2} errors.".format(
file_count, t_total, error_count))
print("That's {} seconds per picture!".format(seconds_per_file))
print("We are all SimplePartners on this blessed day :)")
in_path = input('Enter the input directory: ')
# in_path = "in/bmwtest"
username = "SPOFFICE\\"
username += input('Username for database: ')
# username += "paulhansa"
password = getpass.getpass('Password: ')
out_path = os.path.join(in_path, 'out')
error_path = os.path.join(out_path, 'error.txt')
if not os.path.exists(out_path):
os.makedirs(out_path)
error_log = open(error_path, 'a')
db = ps.connect(
server='spcs01',
user=username,
password=password,
)
cursor = db.cursor(as_dict=True)
if __name__ == '__main__':
main()
cursor.close()
db.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment