Created
          March 2, 2021 17:34 
        
      - 
      
 - 
        
Save Soumi7/f0d06804db2f3725387289f640b16598 to your computer and use it in GitHub Desktop.  
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | from flask import Flask, request, jsonify, render_template, url_for, make_response | |
| from werkzeug.utils import secure_filename | |
| import os | |
| import json | |
| import numpy as np | |
| app = Flask(__name__) | |
| app.config['UPLOAD_FOLDER'] = "./imgdir" | |
| import numpy as np | |
| import cv2 | |
| import pandas as pd | |
| import pytesseract | |
| import io | |
| from PIL import Image | |
| @app.route('/') | |
| def home(): | |
| return render_template('index.html') | |
| @app.route('/predict_api', methods=["GET","POST"]) | |
| def list_post(): | |
| file = request.files['file'] | |
| npimg = np.fromfile(file, np.uint8) | |
| img = cv2.imdecode(npimg, 0) | |
| thresh,img_bin = cv2.threshold(img,128,255,cv2.THRESH_BINARY) | |
| img_bin = 255-img_bin | |
| img_bin1 = 255-img | |
| thresh1,img_bin1_otsu = cv2.threshold(img_bin1,128,255,cv2.THRESH_OTSU) | |
| img_bin2 = 255-img | |
| thresh1,img_bin_otsu = cv2.threshold(img_bin2,128,255,cv2.THRESH_BINARY | cv2.THRESH_OTSU) | |
| kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) | |
| vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, np.array(img).shape[1]//100)) | |
| eroded_image = cv2.erode(img_bin_otsu, vertical_kernel, iterations=3) | |
| vertical_lines = cv2.dilate(eroded_image, vertical_kernel, iterations=3) | |
| hor_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (np.array(img).shape[1]//100, 1)) | |
| horizontal_lines = cv2.erode(img_bin, hor_kernel, iterations=5) | |
| horizontal_lines = cv2.dilate(horizontal_lines, hor_kernel, iterations=5) | |
| vertical_horizontal_lines = cv2.addWeighted(vertical_lines, 0.5, horizontal_lines, 0.5, 0.0) | |
| vertical_horizontal_lines = cv2.erode(~vertical_horizontal_lines, kernel, iterations=3) | |
| thresh, vertical_horizontal_lines = cv2.threshold(vertical_horizontal_lines,128,255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) | |
| bitxor = cv2.bitwise_xor(img,vertical_horizontal_lines) | |
| bitnot = cv2.bitwise_not(bitxor) | |
| contours, hierarchy = cv2.findContours(vertical_horizontal_lines, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | |
| boundingBoxes = [cv2.boundingRect(contour) for contour in contours] | |
| (contours, boundingBoxes) = zip(*sorted(zip(contours, boundingBoxes),key=lambda x:x[1][1])) | |
| boxes = [] | |
| for contour in contours: | |
| x, y, w, h = cv2.boundingRect(contour) | |
| if (w<1000 and h<500): | |
| image = cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2) | |
| boxes.append([x,y,w,h]) | |
| rows=[] | |
| columns=[] | |
| heights = [boundingBoxes[i][3] for i in range(len(boundingBoxes))] | |
| mean = np.mean(heights) | |
| print(mean) | |
| columns.append(boxes[0]) | |
| previous=boxes[0] | |
| for i in range(1,len(boxes)): | |
| if(boxes[i][1]<=previous[1]+mean/2): | |
| columns.append(boxes[i]) | |
| previous=boxes[i] | |
| if(i==len(boxes)-1): | |
| rows.append(columns) | |
| else: | |
| rows.append(columns) | |
| columns=[] | |
| previous = boxes[i] | |
| columns.append(boxes[i]) | |
| # print("Rows") | |
| for row in rows: | |
| print(row) | |
| total_cells=0 | |
| for i in range(len(row)): | |
| if len(row[i]) > total_cells: | |
| total_cells = len(row[i]) | |
| # print(total_cells) | |
| center = [int(rows[i][j][0]+rows[i][j][2]/2) for j in range(len(rows[i])) if rows[0]] | |
| # print(center) | |
| center=np.array(center) | |
| center.sort() | |
| # print(center) | |
| boxes_list = [] | |
| for i in range(len(rows)): | |
| l=[] | |
| for k in range(total_cells): | |
| l.append([]) | |
| for j in range(len(rows[i])): | |
| diff = abs(center-(rows[i][j][0]+rows[i][j][2]/4)) | |
| minimum = min(diff) | |
| indexing = list(diff).index(minimum) | |
| l[indexing].append(rows[i][j]) | |
| boxes_list.append(l) | |
| # for box in boxes_list: | |
| # print(box) | |
| dataframe_final=[] | |
| for i in range(len(boxes_list)): | |
| for j in range(len(boxes_list[i])): | |
| s='' | |
| if(len(boxes_list[i][j])==0): | |
| dataframe_final.append(' ') | |
| else: | |
| for k in range(len(boxes_list[i][j])): | |
| y,x,w,h = boxes_list[i][j][k][0],boxes_list[i][j][k][1], boxes_list[i][j][k][2],boxes_list[i][j][k][3] | |
| roi = bitnot[x:x+h, y:y+w] | |
| kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 1)) | |
| border = cv2.copyMakeBorder(roi,2,2,2,2, cv2.BORDER_CONSTANT,value=[255,255]) | |
| resizing = cv2.resize(border, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC) | |
| dilation = cv2.dilate(resizing, kernel,iterations=1) | |
| erosion = cv2.erode(dilation, kernel,iterations=2) | |
| out = pytesseract.image_to_string(erosion) | |
| if(len(out)==0): | |
| out = pytesseract.image_to_string(erosion) | |
| s = s +" "+ out | |
| dataframe_final.append(s) | |
| # print(dataframe_final) | |
| arr = np.array(dataframe_final) | |
| # print(arr) | |
| dataframe = pd.DataFrame(arr.reshape(len(rows), total_cells)) | |
| data = dataframe.style.set_properties(align="left") | |
| dataframe.to_csv("output.csv") | |
| dataframe=pd.read_csv("output.csv") | |
| res = {} | |
| # dataframe = pd.DataFrame(res) | |
| parsed = json.loads(dataframe.to_json(orient="split")) | |
| s = json.dumps(parsed, indent=4) | |
| print(s) | |
| return jsonify(result= json.dumps(parsed, indent=4)) | |
| @app.route('/',methods=['POST','GET']) | |
| def predict(): | |
| #for HTML GUI rendering | |
| file = request.files['file'] | |
| print(file.filename) | |
| if file.filename.split(".")[-1]!="png": | |
| return render_template('Wrong_file_type.html') | |
| npimg = np.fromfile(file, np.uint8) | |
| img = cv2.imdecode(npimg, 0) | |
| # f.save(os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(f.filename))) | |
| # img = cv2.imread(os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(f.filename))) | |
| # img = cv2.imdecode(img, 0) | |
| thresh,img_bin = cv2.threshold(img,128,255,cv2.THRESH_BINARY) | |
| img_bin = 255-img_bin | |
| img_bin1 = 255-img | |
| thresh1,img_bin1_otsu = cv2.threshold(img_bin1,128,255,cv2.THRESH_OTSU) | |
| img_bin2 = 255-img | |
| thresh1,img_bin_otsu = cv2.threshold(img_bin2,128,255,cv2.THRESH_BINARY | cv2.THRESH_OTSU) | |
| kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) | |
| vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, np.array(img).shape[1]//100)) | |
| eroded_image = cv2.erode(img_bin_otsu, vertical_kernel, iterations=3) | |
| vertical_lines = cv2.dilate(eroded_image, vertical_kernel, iterations=3) | |
| hor_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (np.array(img).shape[1]//100, 1)) | |
| horizontal_lines = cv2.erode(img_bin, hor_kernel, iterations=5) | |
| horizontal_lines = cv2.dilate(horizontal_lines, hor_kernel, iterations=5) | |
| vertical_horizontal_lines = cv2.addWeighted(vertical_lines, 0.5, horizontal_lines, 0.5, 0.0) | |
| vertical_horizontal_lines = cv2.erode(~vertical_horizontal_lines, kernel, iterations=3) | |
| thresh, vertical_horizontal_lines = cv2.threshold(vertical_horizontal_lines,128,255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) | |
| bitxor = cv2.bitwise_xor(img,vertical_horizontal_lines) | |
| bitnot = cv2.bitwise_not(bitxor) | |
| contours, hierarchy = cv2.findContours(vertical_horizontal_lines, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | |
| boundingBoxes = [cv2.boundingRect(contour) for contour in contours] | |
| (contours, boundingBoxes) = zip(*sorted(zip(contours, boundingBoxes),key=lambda x:x[1][1])) | |
| boxes = [] | |
| for contour in contours: | |
| x, y, w, h = cv2.boundingRect(contour) | |
| if (w<1000 and h<500): | |
| image = cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2) | |
| boxes.append([x,y,w,h]) | |
| rows=[] | |
| columns=[] | |
| heights = [boundingBoxes[i][3] for i in range(len(boundingBoxes))] | |
| mean = np.mean(heights) | |
| print(mean) | |
| columns.append(boxes[0]) | |
| previous=boxes[0] | |
| for i in range(1,len(boxes)): | |
| if(boxes[i][1]<=previous[1]+mean/2): | |
| columns.append(boxes[i]) | |
| previous=boxes[i] | |
| if(i==len(boxes)-1): | |
| rows.append(columns) | |
| else: | |
| rows.append(columns) | |
| columns=[] | |
| previous = boxes[i] | |
| columns.append(boxes[i]) | |
| # print("Rows") | |
| for row in rows: | |
| print(row) | |
| total_cells=0 | |
| for i in range(len(row)): | |
| if len(row[i]) > total_cells: | |
| total_cells = len(row[i]) | |
| # print(total_cells) | |
| center = [int(rows[i][j][0]+rows[i][j][2]/2) for j in range(len(rows[i])) if rows[0]] | |
| # print(center) | |
| center=np.array(center) | |
| center.sort() | |
| # print(center) | |
| boxes_list = [] | |
| for i in range(len(rows)): | |
| l=[] | |
| for k in range(total_cells): | |
| l.append([]) | |
| for j in range(len(rows[i])): | |
| diff = abs(center-(rows[i][j][0]+rows[i][j][2]/4)) | |
| minimum = min(diff) | |
| indexing = list(diff).index(minimum) | |
| l[indexing].append(rows[i][j]) | |
| boxes_list.append(l) | |
| # for box in boxes_list: | |
| # print(box) | |
| dataframe_final=[] | |
| for i in range(len(boxes_list)): | |
| for j in range(len(boxes_list[i])): | |
| s='' | |
| if(len(boxes_list[i][j])==0): | |
| dataframe_final.append(' ') | |
| else: | |
| for k in range(len(boxes_list[i][j])): | |
| y,x,w,h = boxes_list[i][j][k][0],boxes_list[i][j][k][1], boxes_list[i][j][k][2],boxes_list[i][j][k][3] | |
| roi = bitnot[x:x+h, y:y+w] | |
| kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 1)) | |
| border = cv2.copyMakeBorder(roi,2,2,2,2, cv2.BORDER_CONSTANT,value=[255,255]) | |
| resizing = cv2.resize(border, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC) | |
| dilation = cv2.dilate(resizing, kernel,iterations=1) | |
| erosion = cv2.erode(dilation, kernel,iterations=2) | |
| out = pytesseract.image_to_string(erosion) | |
| if(len(out)==0): | |
| out = pytesseract.image_to_string(erosion) | |
| s = s +" "+ out | |
| dataframe_final.append(s) | |
| # print(dataframe_final) | |
| arr = np.array(dataframe_final) | |
| # print(arr) | |
| dataframe = pd.DataFrame(arr.reshape(len(rows), total_cells)) | |
| data = dataframe.style.set_properties(align="left") | |
| resp = make_response(dataframe.to_csv()) | |
| resp.headers["Content-Disposition"] = "attachment; filename=table.csv" | |
| resp.headers["Content-Type"] = "text/csv" | |
| return resp | |
| if __name__ == '__main__': | |
| app.run(host='0.0.0.0',port=5000) | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment