Created
May 24, 2016 17:47
-
-
Save mcleary/9aeeb1c4627022ab022b2ffe1995578d to your computer and use it in GitHub Desktop.
O(n) string shortest subsegment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cmath> | |
#include <cstdio> | |
#include <vector> | |
#include <deque> | |
#include <set> | |
#include <unordered_set> | |
#include <unordered_map> | |
#include <string> | |
#include <iostream> | |
#include <iterator> | |
#include <sstream> | |
#include <fstream> | |
#include <algorithm> | |
using namespace std; | |
string to_lower(string input) | |
{ | |
transform(input.begin(), input.end(), input.begin(), ::tolower); | |
return input; | |
} | |
vector<string> tokens; | |
vector<string> tokens_bkp; | |
unordered_set<string> words; | |
unordered_map<string, int> freq; | |
int head = 0; | |
int tail = 0; | |
int min_head = 0; | |
int min_tail = 0; | |
int shortest_sub_segment = 0; | |
bool is_word_valid(const string& token) | |
{ | |
return words.find(token) != words.end(); | |
} | |
bool is_sentence_valid() | |
{ | |
bool is_valid = true; | |
for(auto freq_pair : freq) | |
{ | |
is_valid &= freq_pair.second > 0; | |
} | |
return is_valid; | |
} | |
void update_freq(int head_displacement = 0, int tail_displacement = 0) | |
{ | |
if(head_displacement == 0 && tail_displacement == 0) | |
{ | |
for (auto& freq_pair : freq) | |
{ | |
freq_pair.second = 0; | |
} | |
for (int i = head; i <= tail; ++i) | |
{ | |
auto token = tokens[i]; | |
if (is_word_valid(token)) | |
{ | |
freq[token]++; | |
} | |
} | |
} | |
else | |
{ | |
if(head_displacement > 0) | |
{ | |
auto previous_head_token = tokens[head - 1]; | |
if (is_word_valid(previous_head_token)) | |
{ | |
freq[previous_head_token]--; | |
} | |
} | |
if(tail_displacement > 0) | |
{ | |
auto tail_token = tokens[tail]; | |
if (is_word_valid(tail_token)) | |
{ | |
freq[tail_token]++; | |
} | |
} | |
} | |
} | |
void update_shortest_subsegment() | |
{ | |
int length = tail - head + 1; | |
if (length < shortest_sub_segment) | |
{ | |
shortest_sub_segment = length; | |
min_head = head; | |
min_tail = tail; | |
} | |
} | |
void print_answer() | |
{ | |
for (int i = min_head; i <= min_tail; ++i) | |
{ | |
auto token = tokens_bkp[i]; | |
cout << token << " "; | |
} | |
cout << endl; | |
exit(0); | |
} | |
void print_no_answer() | |
{ | |
cout << "NO SUBSEGMENT FOUND" << endl; | |
exit(0); | |
} | |
int main() | |
{ | |
string paragraph; | |
getline(cin, paragraph); | |
paragraph.erase(remove_if(paragraph.begin(), paragraph.end(), [](char c) { return c == '.'; }), paragraph.end()); | |
istringstream paragraph_stream(paragraph); | |
int word_count; | |
cin >> word_count; | |
tokens_bkp = vector<string>{ istream_iterator<string>{paragraph_stream}, istream_iterator<string>{} }; | |
tokens.reserve(tokens_bkp.size()); | |
for (auto token : tokens_bkp) | |
{ | |
tokens.push_back(to_lower(token)); | |
} | |
for (int i = 0; i < word_count; ++i) | |
{ | |
string word; | |
cin >> word; | |
word = to_lower(word); | |
words.insert(word); | |
freq[word] = 0; | |
} | |
for (int i = 0; i < tokens.size(); ++i) | |
{ | |
auto token = tokens[i]; | |
if (is_word_valid(token)) | |
{ | |
head = i; | |
break; | |
} | |
} | |
tail = head + (int)words.size(); | |
update_freq(); | |
while(!is_sentence_valid()) | |
{ | |
tail++; | |
if(tail == tokens.size()) | |
{ | |
print_no_answer(); | |
} | |
update_freq(0, 1); | |
} | |
shortest_sub_segment = tail - head + 1; | |
min_head = head; | |
min_tail = tail; | |
for (int i = tail; i < tokens.size(); ++i) | |
{ | |
while (head <= tail) | |
{ | |
head++; | |
update_freq(1, 0); | |
auto token = tokens[head]; | |
if (is_word_valid(token) && freq[token] == 1) | |
{ | |
break; | |
} | |
} | |
if (is_sentence_valid()) | |
{ | |
update_shortest_subsegment(); | |
} | |
else | |
{ | |
while (tail < tokens.size() && !is_sentence_valid()) | |
{ | |
tail++; | |
if (tail == tokens.size()) | |
{ | |
break; | |
} | |
update_freq(0, 1); | |
} | |
update_shortest_subsegment(); | |
} | |
if (tail == tokens.size()) | |
{ | |
break; | |
} | |
} | |
if (shortest_sub_segment >= tokens.size()) | |
{ | |
print_no_answer(); | |
} | |
else | |
{ | |
print_answer(); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment