Created
May 15, 2012 01:59
-
-
Save christianroman/2698511 to your computer and use it in GitHub Desktop.
Captcha OCR + Tesseract
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class OCR { | |
private static final String INPUT = "C:/captcha/ex.png"; | |
private static final String OUTPUT = "C:/captcha/captcha-out.png"; | |
private static final String TESSERACT_BIN = "C:/Program Files/Tesseract-OCR/tesseract.exe"; | |
private static final String TESSERACT_OUTPUT = "C:/captcha/out.txt"; | |
private static final int WHITE = 0x00FFFFFF, BLACK = 0x00000000; | |
public static void main(String... args) throws Exception { | |
BufferedImage image = ImageIO.read(new FileInputStream(INPUT)); | |
int average = 0; | |
for( int row = 0; ++row < image.getHeight(); ) | |
for ( int column = 0; ++column < image.getWidth(); ) | |
average += image.getRGB(column, row) & 0x000000FF; | |
average /= image.getWidth() * image.getHeight(); | |
for( int row = 0; ++row < image.getHeight(); ) | |
for ( int column = 0; ++column < image.getWidth(); ) | |
if ((image.getRGB(column, row) & 0x000000FF) <= average * .80) | |
image.setRGB(column, row, BLACK); | |
else | |
image.setRGB(column, row, WHITE); | |
for( int row = 0; ++row < image.getHeight(); ) | |
for ( int column = 0; ++column < image.getWidth(); ) | |
if ((image.getRGB(column, row) & WHITE) == WHITE) { | |
int height = countVerticalWhite(image, column, row); | |
int width = countHorizontalWhite(image, column, row); | |
if ((width * height <= 6) || (width == 1) || (height == 1)) | |
image.setRGB(column, row, BLACK); | |
} | |
for( int row = 0; ++row < image.getHeight(); ) | |
for ( int column = 0; ++column < image.getWidth(); ) | |
if ((image.getRGB(column, row) & WHITE) != WHITE) | |
if (countBlackNeighbors(image, column, row) <= 3) | |
image.setRGB(column, row, WHITE); | |
ImageIO.write(image, "png", new File(OUTPUT)); | |
Process tesseractProc = Runtime.getRuntime().exec(TESSERACT_BIN + " " + OUTPUT + " " + TESSERACT_OUTPUT + " nobatch letters"); | |
tesseractProc.waitFor(); | |
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(TESSERACT_OUTPUT + ".txt"))); | |
System.out.println("CAPTCHA: " + reader.readLine()); | |
reader.close(); | |
} | |
private static int countVerticalWhite(BufferedImage image, int x, int y) { | |
return (countAboveWhite(image, x, y) + countBelowWhite(image, x, y)) + 1; | |
} | |
private static int countHorizontalWhite(BufferedImage image, int x, int y) { | |
return (countLeftWhite(image, x, y) + countRightWhite(image, x, y)) + 1; | |
} | |
private static int countLeftWhite(BufferedImage image, int x, int y) { | |
int leftWhite = 0; | |
x--; | |
while (x-- > 0) | |
if ((image.getRGB(x, y) & WHITE) == WHITE) | |
leftWhite++; | |
else | |
break; | |
return leftWhite; | |
} | |
private static int countRightWhite(BufferedImage image, int x, int y) { | |
int rightWhite = 0; | |
x++; | |
while (x < image.getWidth()) | |
if ((image.getRGB(x++, y) & WHITE) == WHITE) | |
rightWhite++; | |
else | |
break; | |
return rightWhite; | |
} | |
private static int countBlackNeighbors(BufferedImage image, int x, int y) { | |
int numBlacks = 0; | |
if (pixelColor(image, x - 1, y) != WHITE) | |
numBlacks++; | |
if (pixelColor(image, x - 1, y + 1) != WHITE) | |
numBlacks++; | |
if (pixelColor(image, x - 1, y - 1) != WHITE) | |
numBlacks++; | |
if (pixelColor(image, x, y + 1) != WHITE) | |
numBlacks++; | |
if (pixelColor(image, x, y - 1) != WHITE) | |
numBlacks++; | |
if (pixelColor(image, x + 1, y) != WHITE) | |
numBlacks++; | |
if (pixelColor(image, x + 1, y + 1) != WHITE) | |
numBlacks++; | |
if (pixelColor(image, x + 1, y - 1) != WHITE) | |
numBlacks++; | |
return numBlacks; | |
} | |
private static int pixelColor(BufferedImage image, int x, int y) { | |
if (x >= image.getWidth() || x < 0 || y < 0 || y >= image.getHeight()) | |
return WHITE; | |
return image.getRGB(x, y) & WHITE; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment