Skip to content

Instantly share code, notes, and snippets.

@angeldm
Created January 29, 2013 12:00
Show Gist options
  • Save angeldm/4663739 to your computer and use it in GitHub Desktop.
Save angeldm/4663739 to your computer and use it in GitHub Desktop.
Detecting Font size using Tesseract+Leptonica
/*
http://pastebin.com/0dV84hBa
g++ -o test_font_features test_font_features-2.cpp -ltesseract
*/
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>
int main() {
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
if (api->Init(NULL, "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
Pix *pix = pixRead("/usr/src/tesseract-3.02/phototest.tif");
api->SetImage(pix);
int lcount = 1;
api->Recognize(0);
tesseract::ResultIterator* ri = api->GetIterator();
if (ri != 0) {
do {
const char* word = ri->GetUTF8Text(tesseract::RIL_WORD);
if (word != 0) {
const char *font_name;
bool bold, italic, underlined, monospace, serif, smallcaps;
int pointsize, font_id;
font_name = ri->WordFontAttributes(&bold, &italic, &underlined,
&monospace, &serif,
&smallcaps, &pointsize,
&font_id);
printf("%s \t=> fontname: %s, size: %d, font_id: %d, bold: %d,"\
" italic: %d, underlined: %d, monospace: %d, serif: %d,"\
" smallcap: %d\n", word, font_name, pointsize, font_id,
bold, italic, underlined, monospace, serif, smallcaps);
}
delete[] word;
lcount++;
} while (ri->Next(tesseract::RIL_WORD));
}
delete ri;
api->End();
pixDestroy(&pix);
return 0;
}
@neelkadia-zz
Copy link

Do you have Python implementation for this?

@bharath-kumarn
Copy link

Do you have Python implementation for this?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment