Last active
September 15, 2023 12:26
-
-
Save deepak786/60adae3fe4eaacf8b98c to your computer and use it in GitHub Desktop.
How to use Tesseract library for OCR.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1. Download the tesseract library from the following link named libs.zip. | |
https://www.dropbox.com/s/9fwqz88sck3xlk4/libs.zip?dl=0 | |
2. Extract the zip folder. | |
* If you are using Eclipse then copy all the files and folders from libs folder to libs folder in your project. | |
* if you are using Android Studio then Copy all th e folders from libs folder to src/main/jniLibs folder in your project and copy the classes.jar to libs folder. | |
3. Add image containing text inside your downloads folder and give the name a.png. | |
4. create folder named tessdata inside the assets folder in your project. | |
5. download the file named eng.traineddata from the below link and copy it inside the tessdata folder created in the 4th step. | |
https://www.dropbox.com/s/7xdnfzp8qsy4ll9/eng.traineddata?dl=0 | |
6. add permission to manifest "android.permission.WRITE_EXTERNAL_STORAGE" | |
7. copy and paste the below code. | |
try{ | |
bitmap = BitmapFactory.decodeFile(Environment.getExternalStorageDirectory().getAbsolutePath()+"/download/a.png"); | |
// _path = path to the image to be OCRed | |
ExifInterface exif = new ExifInterface(Environment.getExternalStorageDirectory().getAbsolutePath()+"/download/a.png"); | |
int exifOrientation = exif.getAttributeInt(ExifInterface.TAG_ORIENTATION, ExifInterface.ORIENTATION_NORMAL); | |
int rotate = 0; | |
switch (exifOrientation) { | |
case ExifInterface.ORIENTATION_ROTATE_90: | |
rotate = 90; | |
break; | |
case ExifInterface.ORIENTATION_ROTATE_180: | |
rotate = 180; | |
break; | |
case ExifInterface.ORIENTATION_ROTATE_270: | |
rotate = 270; | |
break; | |
} | |
if (rotate != 0) { | |
int w = bitmap.getWidth(); | |
int h = bitmap.getHeight(); | |
// Setting pre rotate | |
Matrix mtx = new Matrix(); | |
mtx.preRotate(rotate); | |
// Rotating Bitmap & convert to ARGB_8888, required by tess | |
bitmap = Bitmap.createBitmap(bitmap, 0, 0, w, h, mtx, false); | |
} | |
bitmap = bitmap.copy(Bitmap.Config.ARGB_8888, true); | |
TessBaseAPI baseApi = new TessBaseAPI(); | |
// tesseract reads language from tesseract folder, create it if not exists. | |
File f = new File(Environment.getExternalStorageDirectory().getAbsolutePath()+"/tesseract/tessdata"); | |
if(!f.exists()){ | |
f.mkdirs(); | |
} | |
// copy the eng lang file from assets folder if not exists. | |
File f1 = new File(Environment.getExternalStorageDirectory().getAbsolutePath()+"/tesseract/tessdata/eng.traineddata"); | |
if(!f1.exists()){ | |
InputStream in = getAssets().open("tessdata/eng.traineddata"); | |
FileOutputStream fout = new FileOutputStream(f1); | |
byte[] buf = new byte[1024]; | |
int len; | |
while ((len = in.read(buf)) > 0) { | |
fout.write(buf, 0, len); | |
} | |
in.close(); | |
fout.close(); | |
} | |
// DATA_PATH = Path to the storage and data path must contain tessdata subdirectory | |
// lang = for which the language data exists, usually "eng" | |
// Eg. baseApi.init("/mnt/sdcard/tesseract/tessdata/eng.traineddata", "eng"); | |
baseApi.init(Environment.getExternalStorageDirectory().getAbsolutePath()+"/tesseract", "eng"); | |
baseApi.setImage(bitmap); | |
String recognizedText = baseApi.getUTF8Text(); | |
baseApi.end(); | |
Toast.makeText(getApplicationContext(), recognizedText, Toast.LENGTH_LONG).show(); | |
System.out.println("Text is>>>>>>>>>>>>>>>>>" + recognizedText); | |
}catch(Exception e){ | |
e.printStackTrace(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Got so much easy way to use OCR...thanx buddy..