Created
November 30, 2019 02:34
-
-
Save knight76/e1a11e9e05391a78c5cf36f42d1f7053 to your computer and use it in GitHub Desktop.
Coursera-Python-Project-pillow-tesseract-opencv(3)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import zipfile | |
from PIL import Image, ImageOps, ImageDraw | |
import pytesseract | |
import cv2 as cv | |
import numpy as np | |
face_cascade = cv.CascadeClassifier('readonly/haarcascade_frontalface_default.xml') | |
parsed_img_src = {} | |
with zipfile.ZipFile('readonly/images.zip', 'r') as archive: | |
for entry in archive.infolist(): | |
with archive.open(entry) as file: | |
img = Image.open(file).convert('RGB') | |
parsed_img_src[entry.filename] = {'pil_img':img} | |
for img_name in parsed_img_src.keys(): | |
open_cv_image = np.array(parsed_img_src[img_name]['pil_img']) | |
img_g = cv.cvtColor(open_cv_image, cv.COLOR_BGR2GRAY) | |
faces_bounding_boxes = face_cascade.detectMultiScale(img_g, 1.3, 5) | |
parsed_img_src[img_name]['faces'] = [] | |
for x,y,w,h in faces_bounding_boxes: | |
face = parsed_img_src[img_name]['pil_img'].crop((x,y,x+w,y+h)) | |
parsed_img_src[img_name]['faces'].append(face) | |
for img_name in parsed_img_src.keys(): | |
for face in parsed_img_src[img_name]['faces']: | |
face.thumbnail((100,100),Image.ANTIALIAS) | |
#search the keyword in every page's text and return the faces | |
def search(keyword): | |
for img_name in parsed_img_src: | |
if (keyword in parsed_img_src[img_name]['text']): | |
if(len(parsed_img_src[img_name]['faces']) != 0): | |
print("Result found in file {}".format(img_name)) | |
h = math.ceil(len(parsed_img_src[img_name]['faces'])/5) | |
contact_sheet=Image.new('RGB',(500, 100*h)) | |
xc = 0 | |
yc = 0 | |
for img in parsed_img_src[img_name]['faces']: | |
contact_sheet.paste(img, (xc, yc)) | |
if xc + 100 == contact_sheet.width: | |
xc = 0 | |
yc += 100 | |
else: | |
xc += 100 | |
display(contact_sheet) | |
else: | |
print("Result found in file {} \nBut there were no faces in that file\n\n".format(img_name)) | |
return | |
# search #1 | |
search('Christopher') | |
# search #2 | |
search('Mark') | |
# search #3 | |
search('pizza') |
Author
knight76
commented
Nov 30, 2019
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment