Created
December 4, 2019 20:42
-
-
Save jin-x/9aaf1d168649f434c8f02f390a98731f to your computer and use it in GitHub Desktop.
Word Counting
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <string> | |
#include <unordered_map> | |
#include <algorithm> | |
#include <chrono> | |
using std::cout; | |
using std::cerr; | |
using std::cin; | |
using std::endl; | |
using std::string; | |
using Clock = std::chrono::steady_clock; | |
class WordCounter | |
{ | |
public: | |
WordCounter() : total_count(0), dif_count(0), string_count(0) {} | |
void count_word(const string& s); | |
void count_string(const string& s); | |
unsigned int total_count, dif_count, string_count; | |
std::unordered_map<string,int> word_map; | |
}; | |
// Count single word | |
void WordCounter::count_word(const string& s) | |
{ | |
++total_count; | |
if (++word_map[s] == 1) { ++dif_count; } | |
} | |
// Count all words in string | |
void WordCounter::count_string(const string& s) | |
{ | |
++string_count; | |
auto cur = s.begin(), end = s.end(), last = end; | |
string word; | |
while (cur != end) { | |
cur = find_if(cur, end, [](char ch){ return ch >= 'a' && ch <= 'z'; }); | |
if (cur == end) { break; } | |
last = find_if_not(cur, end, [](char ch){ return ch >= 'a' && ch <= 'z'; }); | |
word = string(cur, last); | |
count_word(word); | |
cur = last; | |
} | |
} | |
// Main | |
int main(int argc, char* argv[]) | |
{ | |
if (argc < 2) { | |
cout << "usage:\n" | |
" word_count infile.txt [outfile.txt]" << endl; | |
return 1; | |
} | |
WordCounter wc; | |
Clock::time_point clock_start, clock_end; | |
double elapsed_time; | |
// Open source file | |
{ | |
std::ifstream infile(argv[1]); | |
if (!infile) { | |
cerr << "Input file open error!" << endl; | |
return 1; | |
} | |
// Read and process file | |
clock_start = Clock::now(); | |
string s; | |
while (getline(infile, s)) { | |
std::transform(s.begin(), s.end(), s.begin(), tolower); | |
wc.count_string(s); | |
} | |
clock_end = Clock::now(); | |
elapsed_time = std::chrono::duration_cast<std::chrono::duration<double>>(clock_end - clock_start).count(); | |
} | |
// Word counts | |
if (argc >= 3) { | |
std::ofstream outfile(argv[2]); | |
if (!outfile) { | |
cerr << "Output file create error!" << endl; | |
} else { | |
for (auto& word : wc.word_map) { | |
outfile << word.first << " = " << word.second << "\n"; | |
if (!outfile) { | |
cerr << "Output file write error!" << endl; | |
break; | |
} | |
} | |
outfile << "\nNumber of lines = " << wc.string_count << "\n"; | |
outfile << "Number of words total = " << wc.total_count << "\n"; | |
outfile << "Number of different words = " << wc.dif_count << "\n"; | |
outfile << "Elapsed time: " << elapsed_time << " seconds\n"; | |
} | |
} | |
// Statistics | |
cout << "Number of lines = " << wc.string_count << "\n"; | |
cout << "Number of words total = " << wc.total_count << "\n"; | |
cout << "Number of different words = " << wc.dif_count << "\n"; | |
cout << "Elapsed time: " << elapsed_time << " seconds" << endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment