Skip to content

Instantly share code, notes, and snippets.

@mcleary
Created May 24, 2016 17:47
Show Gist options
  • Save mcleary/9aeeb1c4627022ab022b2ffe1995578d to your computer and use it in GitHub Desktop.
Save mcleary/9aeeb1c4627022ab022b2ffe1995578d to your computer and use it in GitHub Desktop.
O(n) string shortest subsegment
#include <cmath>
#include <cstdio>
#include <vector>
#include <deque>
#include <set>
#include <unordered_set>
#include <unordered_map>
#include <string>
#include <iostream>
#include <iterator>
#include <sstream>
#include <fstream>
#include <algorithm>
using namespace std;
string to_lower(string input)
{
transform(input.begin(), input.end(), input.begin(), ::tolower);
return input;
}
vector<string> tokens;
vector<string> tokens_bkp;
unordered_set<string> words;
unordered_map<string, int> freq;
int head = 0;
int tail = 0;
int min_head = 0;
int min_tail = 0;
int shortest_sub_segment = 0;
bool is_word_valid(const string& token)
{
return words.find(token) != words.end();
}
bool is_sentence_valid()
{
bool is_valid = true;
for(auto freq_pair : freq)
{
is_valid &= freq_pair.second > 0;
}
return is_valid;
}
void update_freq(int head_displacement = 0, int tail_displacement = 0)
{
if(head_displacement == 0 && tail_displacement == 0)
{
for (auto& freq_pair : freq)
{
freq_pair.second = 0;
}
for (int i = head; i <= tail; ++i)
{
auto token = tokens[i];
if (is_word_valid(token))
{
freq[token]++;
}
}
}
else
{
if(head_displacement > 0)
{
auto previous_head_token = tokens[head - 1];
if (is_word_valid(previous_head_token))
{
freq[previous_head_token]--;
}
}
if(tail_displacement > 0)
{
auto tail_token = tokens[tail];
if (is_word_valid(tail_token))
{
freq[tail_token]++;
}
}
}
}
void update_shortest_subsegment()
{
int length = tail - head + 1;
if (length < shortest_sub_segment)
{
shortest_sub_segment = length;
min_head = head;
min_tail = tail;
}
}
void print_answer()
{
for (int i = min_head; i <= min_tail; ++i)
{
auto token = tokens_bkp[i];
cout << token << " ";
}
cout << endl;
exit(0);
}
void print_no_answer()
{
cout << "NO SUBSEGMENT FOUND" << endl;
exit(0);
}
int main()
{
string paragraph;
getline(cin, paragraph);
paragraph.erase(remove_if(paragraph.begin(), paragraph.end(), [](char c) { return c == '.'; }), paragraph.end());
istringstream paragraph_stream(paragraph);
int word_count;
cin >> word_count;
tokens_bkp = vector<string>{ istream_iterator<string>{paragraph_stream}, istream_iterator<string>{} };
tokens.reserve(tokens_bkp.size());
for (auto token : tokens_bkp)
{
tokens.push_back(to_lower(token));
}
for (int i = 0; i < word_count; ++i)
{
string word;
cin >> word;
word = to_lower(word);
words.insert(word);
freq[word] = 0;
}
for (int i = 0; i < tokens.size(); ++i)
{
auto token = tokens[i];
if (is_word_valid(token))
{
head = i;
break;
}
}
tail = head + (int)words.size();
update_freq();
while(!is_sentence_valid())
{
tail++;
if(tail == tokens.size())
{
print_no_answer();
}
update_freq(0, 1);
}
shortest_sub_segment = tail - head + 1;
min_head = head;
min_tail = tail;
for (int i = tail; i < tokens.size(); ++i)
{
while (head <= tail)
{
head++;
update_freq(1, 0);
auto token = tokens[head];
if (is_word_valid(token) && freq[token] == 1)
{
break;
}
}
if (is_sentence_valid())
{
update_shortest_subsegment();
}
else
{
while (tail < tokens.size() && !is_sentence_valid())
{
tail++;
if (tail == tokens.size())
{
break;
}
update_freq(0, 1);
}
update_shortest_subsegment();
}
if (tail == tokens.size())
{
break;
}
}
if (shortest_sub_segment >= tokens.size())
{
print_no_answer();
}
else
{
print_answer();
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment