Skip to content

Instantly share code, notes, and snippets.

@shoooe
Created April 30, 2015 06:54

Revisions

  1. @Jefffrey Jefffrey created this gist Apr 30, 2015.
    131 changes: 131 additions & 0 deletions a8.cpp
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,131 @@
    #include <iostream>
    #include <fstream>
    #include <map>
    #include <stdexcept>
    #include <vector>

    namespace a8 {

    // aliases
    using word = std::string;
    using dict = std::map<word, unsigned int>;

    // generates all similar strings by removing 1 character
    // from the original string in different places
    template<typename OutIt>
    OutIt generate_similar_by_deletion(a8::word const& word, OutIt first) {
    std::cout << "Generating by deletetion...\n";
    for (std::size_t i = 0; i < word.size() - 1; i++) {
    a8::word curr = word;
    curr.erase(i, 1);
    std::cout << "\tGenerated: " << curr << '\n';
    *first++ = curr;
    }
    return first;
    }

    // generates all similar strings by substituting 1 character
    // from the alphabet in different positions
    template<typename OutIt>
    OutIt generate_similar_by_substitution(a8::word const& word, OutIt first) {
    std::cout << "Generating by substitution...\n";
    for (std::size_t i = 0; i < word.size(); i++) {
    for (char c = 'A'; c <= 'Z'; c++) {
    a8::word curr = word;
    curr.replace(i, 1, 1, c); // inserts for 1 time at position i the character c for 1 time
    std::cout << "\tGenerated: " << curr << '\n';
    *first++ = curr;
    }
    }
    return first;
    }

    // generates all similar strings by inserting 1 character
    // from the alphabet in different positions
    template<typename OutIt>
    OutIt generate_similar_by_insertion(a8::word const& word, OutIt first) {
    std::cout << "Generating by insertion...\n";
    for (std::size_t i = 0; i < word.size() + 1; i++) {
    for (char c = 'A'; c <= 'Z'; c++) {
    a8::word curr = word;
    curr.insert(i, 1, c); // inserts at i
    std::cout << "\tGenerated: " << curr << '\n';
    *first++ = curr;
    }
    }
    return first;
    }

    // generates all similar string with distance of 1,
    // by deletion, substitution and insertion
    template<typename OutIt>
    OutIt generate_similar(a8::word const& word, OutIt begin) {
    auto begin1 = generate_similar_by_deletion(word, begin);
    auto begin2 = generate_similar_by_substitution(word, begin1);
    auto begin3 = generate_similar_by_insertion(word, begin2);
    return begin3;
    }

    // returns a copy of the value with the given key or
    // the passed in value if not found
    template<typename Dict, typename Key, typename Value>
    Value at_or(Dict const& map, Key const& key, Value other) {
    try { return map.at(key); }
    catch (std::out_of_range const&) { return other; }
    }

    // finds the iterator to the best match in the dictionary if it exists,
    // otherwise it returns the end iterator
    template<typename Dict>
    typename Dict::iterator best_match(Dict& dictionary, a8::word const& word) {
    auto it = dictionary.find(word);
    if (it != dictionary.end()) return it;

    std::vector<a8::word> similar;
    generate_similar(word, std::back_inserter(similar));
    auto similar_it = std::max_element(similar.begin(), similar.end(),
    [&dictionary](a8::word const& a, a8::word const& b) {
    return at_or(dictionary, a, 0u) < at_or(dictionary, b, 0u); // @todo: probably not optimal
    });
    if (similar_it == similar.end()) return dictionary.end();
    else return dictionary.find(*similar_it);
    }

    // takes an input stream with space separated pairs
    // representing the word and the frequency of such word
    a8::dict make_dictionary(std::istream& input) {
    a8::word word;
    unsigned int count;
    a8::dict dictionary;
    while (input >> word && input >> count) {
    dictionary[word] = count;
    }
    return dictionary;
    }

    // runs the user interface for the program
    // asking for words and replying to them one by one
    void run_corrector(dict& dictionary) {
    std::string word;
    while (std::cin >> word) {
    auto it = best_match(dictionary, word);
    if (it != dictionary.end()) {
    std::cout << it->first << ' ' << it->second << '\n';
    it->second++;
    } else {
    std::cout << "-\n";
    dictionary[word] = 1;
    }
    }
    }

    }

    int main(int argc, char* argv[]) {
    if (argc != 2) throw std::runtime_error("Filename not passed in");
    std::string filename = argv[1];
    std::ifstream filestream(filename);
    a8::dict dictionary = a8::make_dictionary(filestream);
    a8::run_corrector(dictionary);
    return 0;
    }