Created
March 3, 2017 16:31
-
-
Save romamik/a9c7b44c7bddb3d3303da39188c26bc4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <iostream> | |
#include <fstream> | |
#include <map> | |
#include <locale> | |
#include <vector> | |
#include <algorithm> | |
void countWords(std::ifstream &in, std::map<std::string, int> &result) { | |
std::string word; | |
std::locale loc("C"); | |
while (!in.eof()) { | |
auto c = std::tolower(in.get(), loc); | |
if (std::isalpha(c, loc)) { | |
word.append(1, c); | |
} | |
else if (word.length() > 0) { | |
result[word]++; | |
word = ""; | |
} | |
} | |
} | |
bool comparePairs(std::pair<std::string, int> &a, std::pair<std::string, int> &b) { | |
return a.second == b.second ? a.first < b.first : a.second > b.second; | |
} | |
void sortWords(std::map<std::string, int> &wordMap, std::vector<std::pair<std::string, int>> &pairList) { | |
for (auto pair = wordMap.begin(); pair != wordMap.end(); ++pair) { | |
pairList.push_back(*pair); | |
} | |
std::sort(pairList.begin(), pairList.end(), comparePairs); | |
} | |
int main(int argc, char* argv[]) { | |
if (argc != 3) { | |
std::cout << "usage: freqs [input] [output]"; | |
} | |
else { | |
auto inFilename = argv[1]; | |
auto outFilename = argv[2]; | |
std::ifstream in; | |
in.open(inFilename); | |
if (!in.is_open()) { | |
std::cout << "cannot open file '" << inFilename << "'"; | |
} | |
else { | |
std::map<std::string, int> wordMap; | |
countWords(in, wordMap); | |
std::vector<std::pair<std::string, int>> pairList; | |
sortWords(wordMap, pairList); | |
std::ofstream out; | |
out.open(outFilename); | |
if (!out.is_open()) { | |
std::cout << "cannot open file '" << outFilename << "'"; | |
} | |
else { | |
for (auto pair = pairList.begin(); pair != pairList.end(); ++pair) { | |
out << pair->second << ' ' << pair->first << std::endl; | |
} | |
} | |
} | |
} | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment