Created
November 17, 2024 00:23
-
-
Save kspalaiologos/bcaf5f019eaaf25c50a5beafa18d56a6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Feeling overwhelmed by "find the words in a grid" tasks for preschoolers? | |
// Look no further! | |
#include <leptonica/allheaders.h> | |
#include <tesseract/baseapi.h> | |
#include <fstream> | |
#include <iostream> | |
#include <sstream> | |
#include <string> | |
#include <vector> | |
using namespace std; | |
bool search2D(vector<string> grid, int row, int col, string word) { | |
int m = grid.size(); | |
int n = grid[0].size(); | |
if (grid[row][col] != word[0]) return false; | |
int len = word.size(); | |
vector<int> x = { -1, -1, -1, 0, 0, 1, 1, 1 }; | |
vector<int> y = { -1, 0, 1, -1, 1, -1, 0, 1 }; | |
for (int dir = 0; dir < 8; dir++) { | |
int k, currX = row + x[dir], currY = col + y[dir]; | |
for (k = 1; k < len; k++) { | |
if (currX >= m || currX < 0 || currY >= n || currY < 0) break; | |
if (grid[currX][currY] != word[k]) break; | |
currX += x[dir], currY += y[dir]; | |
} | |
if (k == len) return true; | |
} | |
return false; | |
} | |
vector<vector<int>> searchWord(vector<string> grid, string word) { | |
int m = grid.size(); | |
int n = grid[0].size(); | |
vector<vector<int>> ans; | |
for (int i = 0; i < m; i++) | |
for (int j = 0; j < n; j++) | |
if (search2D(grid, i, j, word)) | |
ans.push_back({ i, j }); | |
return ans; | |
} | |
void displayMatrix(vector<string> grid, int row, int col) { | |
int m = grid.size(); | |
int n = grid[0].size(); | |
for (int i = 0; i < m; i++) { | |
for (int j = 0; j < n; j++) | |
if (i == row && j == col) cout << "\033[1;31m" << grid[i][j] << "\033[0m"; | |
else cout << grid[i][j]; | |
cout << endl; | |
} | |
} | |
int main(int argc, char ** argv) { | |
if (argc < 2) { cerr << "Give one argument." << endl; return 1; } | |
const char * imagePath = argv[1]; | |
tesseract::TessBaseAPI tess; | |
if (tess.Init(NULL, "eng", tesseract::OEM_LSTM_ONLY) != 0) { | |
cerr << "Could not initialize tesseract." << endl; | |
return 1; | |
} | |
Pix * image = pixRead(imagePath); | |
if (!image) { | |
cerr << "Could not open image file: " << imagePath << endl; | |
return 1; | |
} | |
tess.SetImage(image); | |
tess.SetVariable("tessedit_char_blacklist", "0123456789!@#$%^&*()_+-=[]{};:'\"\\|,.<>/?"); | |
tess.SetVariable("load_system_dawg", "0"); | |
tess.SetVariable("load_freq_dawg", "0"); | |
tess.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK); | |
char * ocrResult = tess.GetUTF8Text(); | |
if (!ocrResult) { | |
cerr << "OCR failed." << endl; | |
pixDestroy(&image); | |
tess.End(); | |
return 1; | |
} | |
cout << "OCR Result:\n" << ocrResult << endl; | |
cout << "Due to OCR fuckery kindly please paste the cleaned up word matrix and put the words to search on the last " | |
"line: " | |
<< endl; | |
vector<string> lines; | |
string line; | |
while (getline(cin, line)) | |
lines.push_back(line); | |
vector<string> words_to_search; | |
istringstream iss(lines.back()); | |
string word; | |
while (iss >> word) | |
words_to_search.push_back(word); | |
lines.pop_back(); | |
vector<vector<int>> ans; | |
for (auto word : words_to_search) { | |
ans = searchWord(lines, word); | |
if (ans.size() > 0) { | |
cout << "Word " << word << " found at: "; | |
for (auto ele : ans) { | |
cout << "(" << ele[0] << ", " << ele[1] << ") " << endl; | |
displayMatrix(lines, ele[0], ele[1]); | |
} | |
cout << endl; | |
} else cout << "Word " << word << " not found." << endl; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment