Created
November 5, 2022 09:35
-
-
Save dermesser/e1c192b57da573896377a34fcaa13e23 to your computer and use it in GitHub Desktop.
Interview question: how to find the largest "word rectangle" (each word and each column is a word) from a dictionary.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <unordered_set> | |
#include <string> | |
#include <vector> | |
#include <memory> | |
#include <cctype> | |
using namespace std; | |
unordered_set<string> read_dictionary(const string& path) { | |
ifstream dict(path); | |
unordered_set<string> set; | |
while (dict.good()) { | |
string word; | |
dict >> word; | |
for (char& c : word) { | |
c = tolower(c); | |
if (!isalpha(c)) { | |
goto ct; | |
} | |
} | |
if (word.size() > 1) | |
set.insert(word); | |
ct: 0; | |
} | |
return set; | |
} | |
vector<unordered_set<string>> by_length(const unordered_set<string>& dict) { | |
size_t maxlen = 0; | |
for (const string& w : dict) | |
maxlen = max(maxlen, w.size()); | |
vector<unordered_set<string>> by_length(maxlen+1); | |
for (const string& w : dict) { | |
by_length[w.size()].insert(w); | |
} | |
return by_length; | |
} | |
class Trie { | |
struct TrieNode; | |
struct TrieNode { | |
vector<unique_ptr<TrieNode>> children; | |
char c; | |
TrieNode() : children(static_cast<size_t>('z'-'a' + 1)) {} | |
}; | |
unique_ptr<TrieNode> root; | |
public: | |
Trie(const unordered_set<string> words) : root(make_unique<TrieNode>()) { | |
for (const string& w : words) { | |
insert(w); | |
} | |
} | |
bool find(const string& prefix) const { | |
const unique_ptr<TrieNode> *current = &root; | |
for (char c : prefix) { | |
if ((*current)->children[c-'a'] != nullptr) { | |
current = &(*current)->children[c-'a']; | |
} else { | |
return false; | |
} | |
} | |
return true; | |
} | |
void insert(const string& word) { | |
unique_ptr<TrieNode> *current = &root; | |
for (char c : word) { | |
if ((*current)->children[c-'a'] != nullptr) { | |
current = &(*current)->children[c-'a']; | |
} else { | |
(*current)->children[c-'a'] = make_unique<TrieNode>(); | |
current = &(*current)->children[c-'a']; | |
} | |
} | |
} | |
}; | |
class Rectangle { | |
vector<string> rectangle; | |
int cols; | |
const vector<unordered_set<string>>& dict; | |
const Trie& prefixes; | |
public: | |
long words_count = 0; | |
Rectangle(size_t rows, size_t cols, const Trie& prefixes, const vector<unordered_set<string>>& dict) | |
: rectangle(rows), cols(cols), dict(dict), prefixes(prefixes) { | |
} | |
const vector<string>& get() const { return rectangle; } | |
bool check_feasible(int upto_rows) { | |
string prefix; | |
prefix.reserve(upto_rows); | |
for (int col = 0; col < cols; col++) { | |
// Check each column if it is a valid prefix. | |
for (int row = 0; row <= upto_rows; row++) | |
prefix.push_back(rectangle[row][col]); | |
if (!prefixes.find(prefix)) | |
return false; | |
prefix.resize(0); | |
} | |
return true; | |
} | |
bool build_rectangle(int rows_left) { | |
if (rows_left == 0) | |
return true; | |
size_t n = rectangle.size() - rows_left; | |
for (const string& w : dict[cols]) { | |
// Fill nth row with word, check feasibility, and continue. | |
rectangle[n] = w; | |
words_count++; | |
if (check_feasible(n)) { | |
bool ok = build_rectangle(rows_left-1); | |
if (ok) | |
return true; | |
continue; | |
} | |
} | |
return false; | |
} | |
}; | |
int main(void) { | |
unordered_set<string> dict = read_dictionary("words.txt"); | |
vector<unordered_set<string>> by_len = by_length(dict); | |
vector<string> ra; | |
int longest = by_len.size()-1; | |
long words_count = 0; | |
for (int cols = longest; cols > 0; cols--) { | |
Trie t(by_len[cols]); | |
for (int rows = longest; rows > 1; rows--) { | |
Rectangle r(rows, cols, t, by_len); | |
if (r.build_rectangle(rows)) { | |
ra = r.get(); | |
words_count += r.words_count; | |
goto out; | |
} | |
words_count += r.words_count; | |
} | |
} | |
out: | |
if (ra.size() > 0) { | |
cout << "found rectangle!\n"; | |
for (size_t i = 0; i < ra.size(); i++) { | |
cout << ra[i] << "\n"; | |
} | |
} | |
cout << "Searched " << words_count << " words\n"; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment