Last active
July 13, 2017 02:47
-
-
Save shuxiang/da6c6c0650bb77f421e7 to your computer and use it in GitHub Desktop.
captcha code hack
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# apt-get install libtesseract3 libtesseract-dev; pip install pytesseract; | |
# download google tesseract eng data then copy it to /usr/local/share/tessdata: https://github.com/tesseract-ocr/tesseract/wiki/Data-Files | |
import Image | |
import requests | |
import pytesseract | |
from StringIO import StringIO | |
jpg = Image.open(StringIO(requests.get('http://test.jpg').content)) | |
code = pytesseract.image_to_string(jpg).replace(' ', '') | |
#==================================================== | |
# if not training data by yourself, tesseract3 is not good enough, use open cv is better | |
# training: https://github.com/this-is-ari/python-tesseract-3.02-training | |
# wget https://github.com/RobinDavid/Pytesser/blob/master/pytesser.py | |
##advanced: http://www.robindavid.fr//opencv-tutorial/cracking-basic-captchas-with-opencv.html | |
import pytesser | |
from pytesser import iplimage_to_string | |
import re | |
import cv | |
from cv import fromarray | |
import cv2 | |
import numpy as np | |
from urllib2 import urlopen | |
from cStringIO import StringIO | |
def create_opencv_image_from_stringio(img_stream, cv2_img_flag=0): | |
img_stream.seek(0) | |
img_array = np.asarray(bytearray(img_stream.read()), dtype=np.uint8) | |
return cv2.imdecode(img_array, cv2_img_flag) | |
def create_opencv_image_from_url(url, cv2_img_flag=0): | |
request = urlopen(url) | |
img_array = np.asarray(bytearray(request.read()), dtype=np.uint8) | |
return cv2.imdecode(img_array, cv2_img_flag) | |
image = create_opencv_image_from_stringio(StringIO(requests.get('http://test.jpg').content)) | |
image = cv.fromarray(image) | |
print iplimage_to_string(image) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment