Last active
April 7, 2019 15:09
-
-
Save liberize/79f9bfb5a7e767b4b756ff1c7dc04eaa to your computer and use it in GitHub Desktop.
简单验证码识别
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
import os | |
import sys | |
from PIL import Image, ImageChops | |
templates = [] | |
def generate_templates(path): | |
img = Image.open(path) | |
r, g, b, a = img.split() | |
bw = g.point(lambda x: 0 if x<10 else 255, '1') | |
for i in range(4): | |
bw.crop((5+i*7, 5, 5+i*7+6, 5+10)).save('{}_{}.png'.format(path, i)) | |
def read_templates(): | |
for i in range(10): | |
templates.append(Image.open('{}.png'.format(i))) | |
def recognise(path): | |
img = Image.open(path) | |
r, g, b, a = img.split() | |
bw = g.point(lambda x: 0 if x<10 else 255, '1') | |
result = '' | |
for i in range(4): | |
num = bw.crop((5+i*7, 5, 5+i*7+6, 15)) | |
diff = [sum(ImageChops.difference(num, templates[j]).getdata()) / 255 for j in range(10)] | |
result += str(diff.index(min(diff))) | |
return result | |
read_templates() | |
for subdir, dirs, files in os.walk('.'): | |
for f in files: | |
if not f.endswith('.jpg'): | |
continue | |
f = os.path.join(subdir, f) | |
num = os.path.basename(f) | |
result = recognise(f) | |
print 'file: {}, result: {}'.format(num, result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
首先取若干图片样本放到脚本目录,
注释掉 read_templates 并将 result = recognise(f) 改为 result = generate_templates(f)
运行脚本得到分割成的数字
人工识别,并重命名,得到 0.png, 1.png, ..., 9.png 共 10 个模板文件
然后将脚本改回去,跑一遍就可以把当前目录所有图片文件识别出来了