Created
April 27, 2020 01:25
-
-
Save 0x0kurooo/ca0f5707505b1ee477a5eed2a969beca to your computer and use it in GitHub Desktop.
Word Count Engine
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <string> | |
#include <vector> | |
#include <unordered_map> | |
#include <map> | |
using namespace std; | |
// Time complexity: O(N) for making occ map, O(logN) for making sorted map, and O(N) for buil the answer. So total is O(2N + logN) | |
// Space complexity: O(3N) for 2 map and the answer | |
vector<vector<string>> wordCountEngine( const string& origin_document ) | |
{ | |
// Add trailing space in the end of origin string | |
string document = origin_document; | |
document += " "; | |
// Check if input empty | |
vector<vector<string>> ans; | |
int n = document.length(); | |
if (n == 0) return ans; | |
unordered_map<string, pair<int, int>> occ; | |
int idx = 0; | |
// Start from begin of string, if we see a whitespace | |
// lookback to previous whitespace index | |
for (int i = 0; i < n; i++) { | |
if (document[i] == ' ') { | |
string subStr = ""; | |
// And make a substring from valid character | |
for (int j = idx; j <= i; j++) { | |
if (isalpha(document[j])) { | |
subStr += tolower(document[j]); | |
} | |
} | |
if (subStr.length() == 0) continue; | |
// Foreach substring, we keep track its first index | |
// and it occurrences | |
if (occ.find(subStr) == occ.end()) { | |
occ[subStr] = {idx, 0}; | |
} | |
occ[subStr] = {occ[subStr].first, occ[subStr].second + 1}; | |
idx = i + 1; | |
} | |
} | |
// After we have collect all substring data | |
// we need sorted it in order, so I use a map of map | |
// with the first key is the substring occurences and the second | |
// key is the substring first index | |
map<int, map<int, string>> sorted; | |
for (auto &it : occ) { | |
string subStr = it.first; | |
int _occ = it.second.second; | |
int _idx = it.second.first; | |
if (sorted.find(_occ) == sorted.end()) { | |
sorted[_occ] = map<int, string>(); | |
} | |
sorted[_occ][_idx] = subStr; | |
} | |
// Then i just make answer by travel over this sorted map | |
for (auto &it : sorted) { | |
// Note: in this sub map, the index are reversed sorted | |
for (auto sub_it = it.second.rbegin(); sub_it != it.second.rend(); ++sub_it) { | |
vector<string> temp; | |
temp.push_back(sub_it->second); | |
temp.push_back(to_string(it.first)); | |
ans.insert(ans.begin(), temp); | |
} | |
} | |
return ans; | |
} | |
int main() { | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment