Created
December 19, 2019 09:09
-
-
Save JustSlavic/283030aac2a04184e07f028fc9cb26eb to your computer and use it in GitHub Desktop.
Getting frequencies of words (ASCII)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <string> | |
#include <functional> | |
#include <algorithm> | |
#include <vector> | |
class dictionary { | |
public: | |
using value_t = std::pair<std::string, size_t>; | |
std::function<bool(const value_t&, const value_t&)> cmp = [](const value_t& lhs, const value_t& rhs) -> bool { | |
if (lhs.second == rhs.second) { | |
return lhs.first < rhs.first; | |
} | |
return lhs.second > rhs.second; | |
}; | |
void push(const std::string& value) { | |
auto found = std::find_if( | |
container.begin(), | |
container.end(), | |
[&value](const value_t& v) { | |
return v.first == value; | |
}); | |
if (found == std::end(container)) { | |
container.emplace_back(value, 1); | |
} else { | |
found->second++; | |
} | |
} | |
void sort() { | |
std::sort(container.begin(), container.end(), cmp); | |
} | |
std::vector<value_t> to_vector() const { | |
return container; | |
} | |
private: | |
std::vector<value_t> container; | |
}; | |
void apply_to_words(std::istream& input, const std::function<void(std::string&)>& callback) { | |
std::string buffer; | |
while (!input.eof()) { | |
char c = input.get(); | |
if (std::isalpha(c)) { | |
buffer.push_back(std::tolower(c)); | |
} else if (!buffer.empty()) { | |
callback(buffer); | |
buffer.clear(); | |
} | |
} | |
if (!buffer.empty()) { | |
callback(buffer); | |
} | |
} | |
std::vector<std::pair<std::string, size_t>> count_words(std::istream& input) { | |
dictionary dict; | |
apply_to_words(input, [&dict](std::string& word) { dict.push(word); }); | |
dict.sort(); | |
return dict.to_vector(); | |
} | |
int main(int argc, char** argv) { | |
if (argc < 3) { | |
std::cout << "usage: freq <input file> <output file>" << std::endl; | |
return EXIT_FAILURE; | |
} | |
std::ifstream input(argv[1]); | |
if (!input.is_open()) { | |
std::cerr << "Cannot open the input file"; | |
return 1; | |
} | |
std::ofstream output(argv[2]); | |
if (!output.is_open()) { | |
std::cerr << "Cannot open the output file"; | |
return 1; | |
} | |
auto result = count_words(input); | |
return EXIT_SUCCESS; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment