Skip to content

Instantly share code, notes, and snippets.

@jin-x
Created December 4, 2019 20:42
Show Gist options
  • Save jin-x/9aaf1d168649f434c8f02f390a98731f to your computer and use it in GitHub Desktop.
Save jin-x/9aaf1d168649f434c8f02f390a98731f to your computer and use it in GitHub Desktop.
Word Counting
#include <iostream>
#include <fstream>
#include <string>
#include <unordered_map>
#include <algorithm>
#include <chrono>
using std::cout;
using std::cerr;
using std::cin;
using std::endl;
using std::string;
using Clock = std::chrono::steady_clock;
class WordCounter
{
public:
WordCounter() : total_count(0), dif_count(0), string_count(0) {}
void count_word(const string& s);
void count_string(const string& s);
unsigned int total_count, dif_count, string_count;
std::unordered_map<string,int> word_map;
};
// Count single word
void WordCounter::count_word(const string& s)
{
++total_count;
if (++word_map[s] == 1) { ++dif_count; }
}
// Count all words in string
void WordCounter::count_string(const string& s)
{
++string_count;
auto cur = s.begin(), end = s.end(), last = end;
string word;
while (cur != end) {
cur = find_if(cur, end, [](char ch){ return ch >= 'a' && ch <= 'z'; });
if (cur == end) { break; }
last = find_if_not(cur, end, [](char ch){ return ch >= 'a' && ch <= 'z'; });
word = string(cur, last);
count_word(word);
cur = last;
}
}
// Main
int main(int argc, char* argv[])
{
if (argc < 2) {
cout << "usage:\n"
" word_count infile.txt [outfile.txt]" << endl;
return 1;
}
WordCounter wc;
Clock::time_point clock_start, clock_end;
double elapsed_time;
// Open source file
{
std::ifstream infile(argv[1]);
if (!infile) {
cerr << "Input file open error!" << endl;
return 1;
}
// Read and process file
clock_start = Clock::now();
string s;
while (getline(infile, s)) {
std::transform(s.begin(), s.end(), s.begin(), tolower);
wc.count_string(s);
}
clock_end = Clock::now();
elapsed_time = std::chrono::duration_cast<std::chrono::duration<double>>(clock_end - clock_start).count();
}
// Word counts
if (argc >= 3) {
std::ofstream outfile(argv[2]);
if (!outfile) {
cerr << "Output file create error!" << endl;
} else {
for (auto& word : wc.word_map) {
outfile << word.first << " = " << word.second << "\n";
if (!outfile) {
cerr << "Output file write error!" << endl;
break;
}
}
outfile << "\nNumber of lines = " << wc.string_count << "\n";
outfile << "Number of words total = " << wc.total_count << "\n";
outfile << "Number of different words = " << wc.dif_count << "\n";
outfile << "Elapsed time: " << elapsed_time << " seconds\n";
}
}
// Statistics
cout << "Number of lines = " << wc.string_count << "\n";
cout << "Number of words total = " << wc.total_count << "\n";
cout << "Number of different words = " << wc.dif_count << "\n";
cout << "Elapsed time: " << elapsed_time << " seconds" << endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment