Skip to content

Instantly share code, notes, and snippets.

@shuxiang
Last active July 13, 2017 02:47
Show Gist options
  • Save shuxiang/da6c6c0650bb77f421e7 to your computer and use it in GitHub Desktop.
Save shuxiang/da6c6c0650bb77f421e7 to your computer and use it in GitHub Desktop.
captcha code hack
# apt-get install libtesseract3 libtesseract-dev; pip install pytesseract;
# download google tesseract eng data then copy it to /usr/local/share/tessdata: https://github.com/tesseract-ocr/tesseract/wiki/Data-Files
import Image
import requests
import pytesseract
from StringIO import StringIO
jpg = Image.open(StringIO(requests.get('http://test.jpg').content))
code = pytesseract.image_to_string(jpg).replace(' ', '')
#====================================================
# if not training data by yourself, tesseract3 is not good enough, use open cv is better
# training: https://github.com/this-is-ari/python-tesseract-3.02-training
# wget https://github.com/RobinDavid/Pytesser/blob/master/pytesser.py
##advanced: http://www.robindavid.fr//opencv-tutorial/cracking-basic-captchas-with-opencv.html
import pytesser
from pytesser import iplimage_to_string
import re
import cv
from cv import fromarray
import cv2
import numpy as np
from urllib2 import urlopen
from cStringIO import StringIO
def create_opencv_image_from_stringio(img_stream, cv2_img_flag=0):
img_stream.seek(0)
img_array = np.asarray(bytearray(img_stream.read()), dtype=np.uint8)
return cv2.imdecode(img_array, cv2_img_flag)
def create_opencv_image_from_url(url, cv2_img_flag=0):
request = urlopen(url)
img_array = np.asarray(bytearray(request.read()), dtype=np.uint8)
return cv2.imdecode(img_array, cv2_img_flag)
image = create_opencv_image_from_stringio(StringIO(requests.get('http://test.jpg').content))
image = cv.fromarray(image)
print iplimage_to_string(image)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment