Created
April 30, 2015 06:54
Revisions
-
Jefffrey created this gist
Apr 30, 2015 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,131 @@ #include <iostream> #include <fstream> #include <map> #include <stdexcept> #include <vector> namespace a8 { // aliases using word = std::string; using dict = std::map<word, unsigned int>; // generates all similar strings by removing 1 character // from the original string in different places template<typename OutIt> OutIt generate_similar_by_deletion(a8::word const& word, OutIt first) { std::cout << "Generating by deletetion...\n"; for (std::size_t i = 0; i < word.size() - 1; i++) { a8::word curr = word; curr.erase(i, 1); std::cout << "\tGenerated: " << curr << '\n'; *first++ = curr; } return first; } // generates all similar strings by substituting 1 character // from the alphabet in different positions template<typename OutIt> OutIt generate_similar_by_substitution(a8::word const& word, OutIt first) { std::cout << "Generating by substitution...\n"; for (std::size_t i = 0; i < word.size(); i++) { for (char c = 'A'; c <= 'Z'; c++) { a8::word curr = word; curr.replace(i, 1, 1, c); // inserts for 1 time at position i the character c for 1 time std::cout << "\tGenerated: " << curr << '\n'; *first++ = curr; } } return first; } // generates all similar strings by inserting 1 character // from the alphabet in different positions template<typename OutIt> OutIt generate_similar_by_insertion(a8::word const& word, OutIt first) { std::cout << "Generating by insertion...\n"; for (std::size_t i = 0; i < word.size() + 1; i++) { for (char c = 'A'; c <= 'Z'; c++) { a8::word curr = word; curr.insert(i, 1, c); // inserts at i std::cout << "\tGenerated: " << curr << '\n'; *first++ = curr; } } return first; } // generates all similar string with distance of 1, // by deletion, substitution and insertion template<typename OutIt> OutIt generate_similar(a8::word const& word, OutIt begin) { auto begin1 = generate_similar_by_deletion(word, begin); auto begin2 = generate_similar_by_substitution(word, begin1); auto begin3 = generate_similar_by_insertion(word, begin2); return begin3; } // returns a copy of the value with the given key or // the passed in value if not found template<typename Dict, typename Key, typename Value> Value at_or(Dict const& map, Key const& key, Value other) { try { return map.at(key); } catch (std::out_of_range const&) { return other; } } // finds the iterator to the best match in the dictionary if it exists, // otherwise it returns the end iterator template<typename Dict> typename Dict::iterator best_match(Dict& dictionary, a8::word const& word) { auto it = dictionary.find(word); if (it != dictionary.end()) return it; std::vector<a8::word> similar; generate_similar(word, std::back_inserter(similar)); auto similar_it = std::max_element(similar.begin(), similar.end(), [&dictionary](a8::word const& a, a8::word const& b) { return at_or(dictionary, a, 0u) < at_or(dictionary, b, 0u); // @todo: probably not optimal }); if (similar_it == similar.end()) return dictionary.end(); else return dictionary.find(*similar_it); } // takes an input stream with space separated pairs // representing the word and the frequency of such word a8::dict make_dictionary(std::istream& input) { a8::word word; unsigned int count; a8::dict dictionary; while (input >> word && input >> count) { dictionary[word] = count; } return dictionary; } // runs the user interface for the program // asking for words and replying to them one by one void run_corrector(dict& dictionary) { std::string word; while (std::cin >> word) { auto it = best_match(dictionary, word); if (it != dictionary.end()) { std::cout << it->first << ' ' << it->second << '\n'; it->second++; } else { std::cout << "-\n"; dictionary[word] = 1; } } } } int main(int argc, char* argv[]) { if (argc != 2) throw std::runtime_error("Filename not passed in"); std::string filename = argv[1]; std::ifstream filestream(filename); a8::dict dictionary = a8::make_dictionary(filestream); a8::run_corrector(dictionary); return 0; }