Skip to content

Instantly share code, notes, and snippets.

@legnaleurc
Last active February 18, 2020 07:59
Show Gist options
  • Save legnaleurc/6061259 to your computer and use it in GitHub Desktop.
Save legnaleurc/6061259 to your computer and use it in GitHub Desktop.
std::vector<std::string> findSubstrings(std::vector<std::string> words, std::vector<std::string> parts) {
sort(begin(parts), end(parts), [](const std::string & a, const std::string & b) -> bool {
return a.size() > b.size();
});
for (int i = 0; i < words.size(); ++i) {
words[i] = highlight(words[i], parts);
}
return words;
}
std::string highlight(
const std::string & word,
const std::vector<std::string> & parts
) {
auto offset = std::string::npos;
auto length = 0;
std::string_view tmp = word;
for (const auto & keyword : parts) {
if (keyword.size() < length) {
continue;
}
auto pos = tmp.find(keyword);
if (pos == std::string::npos) {
continue;
}
if (pos >= offset) {
continue;
}
offset = pos;
length = keyword.size();
tmp = tmp.substr(0, offset + length);
}
if (offset == std::string::npos) {
return word;
}
auto rv = word;
rv.insert(offset + length, "]");
rv.insert(offset, "[");
return rv;
}
struct Node;
using NodeSP = std::shared_ptr<Node>;
struct Node {
Node (): terminal(false) {}
bool terminal;
std::unordered_map<char, NodeSP> children;
};
class Trie {
NodeSP root;
public:
Trie(): root(std::make_shared<Node>()) {}
void insert(const std::string & key) {
auto node = this->root;
for (char c : key) {
auto it = node->children.find(c);
if (it == node->children.end()) {
auto newNode = std::make_shared<Node>();
node->children.insert({ c, newNode });
node = newNode;
} else {
node = it->second;
}
}
node->terminal = true;
}
std::tuple<int, int> find(const std::string & word) const {
int bestLength = 0;
int bestOffset = 0;
for (int i = 0; i < word.size(); ++i) {
auto tmp = this->find(word, i);
auto offset = get<0>(tmp);
auto length = get<1>(tmp);
if (length > bestLength) {
bestOffset = offset;
bestLength = length;
}
}
return { bestOffset, bestLength };
}
private:
std::tuple<int, int> find(const std::string & word, int k) const {
if (k >= word.size()) {
return { 0, 0 };
}
int bestLength = 0;
int bestOffset = 0;
int length = 0;
int offset = 0;
auto node = this->root;
for (int i = k; i < word.size(); ++i) {
char c = word[i];
auto it = node->children.find(c);
if (it == node->children.end()) {
if (node->terminal && length > bestLength) {
bestLength = length;
bestOffset = offset;
}
if (length > 0) {
i = offset;
}
length = 0;
node = this->root;
continue;
}
if (length == 0) {
offset = i;
}
++length;
node = it->second;
}
if (node->terminal && length > bestLength) {
bestLength = length;
bestOffset = offset;
}
return { bestOffset, bestLength };
}
};
std::string highlight(
const std::string & word,
const Trie & trie
) {
auto range_ = trie.find(word);
auto offset = get<0>(range_);
auto length = get<1>(range_);
if (length <= 0) {
return word;
}
auto rv = word;
rv.insert(offset + length, "]");
rv.insert(offset, "[");
return rv;
}
std::vector<std::string> findSubstrings(std::vector<std::string> words, std::vector<std::string> parts) {
Trie trie;
for (const auto & part : parts) {
trie.insert(part);
}
for (auto & word : words) {
word = highlight(word, trie);
}
return words;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment