Skip to content

Instantly share code, notes, and snippets.

@kspalaiologos
Created November 17, 2024 00:23
Show Gist options
  • Save kspalaiologos/bcaf5f019eaaf25c50a5beafa18d56a6 to your computer and use it in GitHub Desktop.
Save kspalaiologos/bcaf5f019eaaf25c50a5beafa18d56a6 to your computer and use it in GitHub Desktop.
// Feeling overwhelmed by "find the words in a grid" tasks for preschoolers?
// Look no further!
#include <leptonica/allheaders.h>
#include <tesseract/baseapi.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
using namespace std;
bool search2D(vector<string> grid, int row, int col, string word) {
int m = grid.size();
int n = grid[0].size();
if (grid[row][col] != word[0]) return false;
int len = word.size();
vector<int> x = { -1, -1, -1, 0, 0, 1, 1, 1 };
vector<int> y = { -1, 0, 1, -1, 1, -1, 0, 1 };
for (int dir = 0; dir < 8; dir++) {
int k, currX = row + x[dir], currY = col + y[dir];
for (k = 1; k < len; k++) {
if (currX >= m || currX < 0 || currY >= n || currY < 0) break;
if (grid[currX][currY] != word[k]) break;
currX += x[dir], currY += y[dir];
}
if (k == len) return true;
}
return false;
}
vector<vector<int>> searchWord(vector<string> grid, string word) {
int m = grid.size();
int n = grid[0].size();
vector<vector<int>> ans;
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
if (search2D(grid, i, j, word))
ans.push_back({ i, j });
return ans;
}
void displayMatrix(vector<string> grid, int row, int col) {
int m = grid.size();
int n = grid[0].size();
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++)
if (i == row && j == col) cout << "\033[1;31m" << grid[i][j] << "\033[0m";
else cout << grid[i][j];
cout << endl;
}
}
int main(int argc, char ** argv) {
if (argc < 2) { cerr << "Give one argument." << endl; return 1; }
const char * imagePath = argv[1];
tesseract::TessBaseAPI tess;
if (tess.Init(NULL, "eng", tesseract::OEM_LSTM_ONLY) != 0) {
cerr << "Could not initialize tesseract." << endl;
return 1;
}
Pix * image = pixRead(imagePath);
if (!image) {
cerr << "Could not open image file: " << imagePath << endl;
return 1;
}
tess.SetImage(image);
tess.SetVariable("tessedit_char_blacklist", "0123456789!@#$%^&*()_+-=[]{};:'\"\\|,.<>/?");
tess.SetVariable("load_system_dawg", "0");
tess.SetVariable("load_freq_dawg", "0");
tess.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK);
char * ocrResult = tess.GetUTF8Text();
if (!ocrResult) {
cerr << "OCR failed." << endl;
pixDestroy(&image);
tess.End();
return 1;
}
cout << "OCR Result:\n" << ocrResult << endl;
cout << "Due to OCR fuckery kindly please paste the cleaned up word matrix and put the words to search on the last "
"line: "
<< endl;
vector<string> lines;
string line;
while (getline(cin, line))
lines.push_back(line);
vector<string> words_to_search;
istringstream iss(lines.back());
string word;
while (iss >> word)
words_to_search.push_back(word);
lines.pop_back();
vector<vector<int>> ans;
for (auto word : words_to_search) {
ans = searchWord(lines, word);
if (ans.size() > 0) {
cout << "Word " << word << " found at: ";
for (auto ele : ans) {
cout << "(" << ele[0] << ", " << ele[1] << ") " << endl;
displayMatrix(lines, ele[0], ele[1]);
}
cout << endl;
} else cout << "Word " << word << " not found." << endl;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment