Last active
January 10, 2018 20:19
-
-
Save petuhovskiy/11b3e9c3b2cec27a22022cf3bf607e66 to your computer and use it in GitHub Desktop.
My Markov Chains generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <array> | |
#include <iostream> | |
#include <map> | |
#include <string> | |
#include <utility> | |
using namespace std; | |
using PrevArr = vector<wchar_t>; | |
wchar_t generateNext( | |
const PrevArr& arr, | |
const map<PrevArr, map<int, wchar_t>>& next, | |
const map<PrevArr, int>& totalCnt, | |
mt19937& rnd | |
) { | |
int total = totalCnt.at(arr); | |
if (total == 0) { | |
wcerr << wstring(arr.begin(), arr.end()) << " => 0\n"; | |
return 0; | |
} | |
const auto& it = next.at(arr).lower_bound(rnd() % total + 1); | |
// assert(it != next.at(arr).end()); | |
return it->second; | |
} | |
wstring readAll() { | |
wstring result; | |
wchar_t c; | |
while (wcin.get(c)) { | |
result += c; | |
} | |
return result; | |
} | |
int main(int argc, const char *argv[]) { | |
int n = 3; | |
if (argc > 1) { | |
n = std::atoi(argv[1]); | |
} | |
cerr << "N = " << n << "\n"; | |
wstring base = readAll(); | |
random_device rd; | |
mt19937 gen(rd()); | |
cerr << "All read.\n"; | |
map<PrevArr, map<wchar_t, int>> freqs; | |
map<PrevArr, int> totalCnt; | |
for (size_t i = n; i != base.size(); ++i) { | |
PrevArr arr(base.begin() + i - n, base.begin() + i); | |
++freqs[arr][base[i]]; | |
++totalCnt[arr]; | |
} | |
cerr << "Freqs counted\n"; | |
map<PrevArr, map<int, wchar_t>> next; | |
for (const auto& pair : freqs) { | |
int cnt = 0; | |
auto& nextMap = next[pair.first]; | |
for (const auto& freq : pair.second) { | |
cnt += freq.second; | |
nextMap[cnt] = freq.first; | |
} | |
} | |
cerr << "Next counted\n"; | |
int pos = gen() % (base.size() - n) + n; | |
PrevArr arr(base.begin() + pos - n, base.begin() + pos); | |
for (wchar_t c : arr) { | |
wcout << c; | |
} | |
wchar_t nextChar; | |
int steps = 0; | |
while ((nextChar = generateNext(arr, next, totalCnt, gen)) != 0 && ++steps < 10000) { | |
wcout << nextChar; | |
rotate(arr.begin(), arr.begin() + 1, arr.end()); | |
arr.back() = nextChar; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment