Created
April 2, 2016 23:55
-
-
Save fluffy-critter/0fe944d41d182d134dd6e2d2f3b7d3b6 to your computer and use it in GitHub Desktop.
A really dumb genetic algorithm for optimizing hash coefficients
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <map> | |
#include <vector> | |
#include <string> | |
#include <algorithm> | |
#include <random> | |
#include <chrono> | |
#include <thread> | |
#include <mutex> | |
namespace { | |
auto rng = std::default_random_engine(std::chrono::system_clock::now().time_since_epoch().count()); | |
} | |
int h(const char* w, const char* c, int s) {unsigned t = 0, p = 0; while (*w) { t = c[p++ % s] * t + *w++; } return t % 16777213;} | |
struct Algo { | |
std::string coef; | |
Algo() {} | |
Algo(std::string c): coef(c) {} | |
bool operator<(const Algo& rhs) const { | |
return coef.size() < rhs.coef.size() | |
|| (coef.size() == rhs.coef.size() && coef < rhs.coef); | |
} | |
}; | |
int score(const std::vector<std::string>& words, const Algo& alg) | |
{ | |
std::map<int, int> shared; | |
for (auto w : words) { | |
shared[h(w.c_str(), alg.coef.c_str(), alg.coef.size())]++; | |
} | |
int count = 0; | |
for (auto c : shared) { | |
if ((c.first & 0xFFFFFF) != c.first) { std::cerr << "invalid hash: " << c.first << std::endl; } | |
if (c.second > 1) { count += c.second; } | |
} | |
return count; | |
} | |
Algo breed(const Algo& a1, const Algo& a2) | |
{ | |
std::string coef; | |
if ((rng() & 3) == 1) { | |
// concat | |
coef = a1.coef + a2.coef; | |
size_t len = std::min(coef.size(), size_t(56)); | |
if (len < coef.size()) { | |
coef = coef.substr(rng() % (coef.size() - len + 1), len); | |
} | |
} else { | |
// splice | |
const std::string& s1 = a1.coef, &s2 = a2.coef; | |
size_t sz = std::min(s1.size(), s2.size()); | |
// Find the codon substring | |
std::string c1, c2; | |
c1 = s1.substr(rng() % (s1.length() - sz + 1), sz); | |
c2 = s2.substr(rng() % (s2.length() - sz + 1), sz); | |
// Breed 'em | |
for (size_t k = 0; k < sz; k++) { | |
coef += ((rng() & 127) ? c1 : c2)[k]; | |
} | |
} | |
// Randomly swap some codons | |
for (int n = rng() % coef.size(); n >= 0; n--) { | |
std::swap(coef[rng() % coef.size()], coef[rng() % coef.size()]); | |
} | |
// Randomly mutate some codons | |
for (int n = rng() % coef.size(); n >= 0; n--) { | |
coef[rng() % coef.size()] ^= 1 << (rng() & 7); | |
} | |
// Ensure printability | |
for (char& c : coef) { | |
while (!isprint(c)) { | |
c ^= 1 << (rng() & 7); | |
} | |
} | |
return Algo(coef); | |
} | |
int main(void) | |
{ | |
std::map<int, int> shared; | |
std::vector<std::pair<int, Algo>> scores; | |
std::vector<std::string> words; | |
std::mutex scoreLock; | |
{ | |
std::string s; | |
while (std::cin >> s) { | |
words.push_back(s); | |
} | |
std::cout << "Loaded " << words.size() << " words" << std::endl; | |
} | |
{ | |
std::string seeds[] = {"%)+/15;=CGIOSYaegkmqy", // primes | |
"*", | |
"5euy", | |
"abcdefghijklmnopqrstuvwxyz", | |
"12489301940139041904190841", | |
"jnmrhfsxdluyinhsiobxgvpftcywtkjeLzemvpoaqgqbuaw", | |
"9YN", | |
"~kgkloK", | |
"Y9_", | |
"5/5;=CGI", | |
"*nt", | |
}; | |
for (auto s : seeds) { | |
Algo a(s); | |
int sc = score(words, a); | |
std::cout << s << " -> " << sc << std::endl; | |
scores.emplace_back(sc, a); | |
} | |
std::sort(scores.begin(), scores.end()); | |
} | |
std::cout << "Starting best score: '" << scores.front().second.coef << "' -> " << scores.front().first << std::endl; | |
std::vector<std::thread> threads; | |
for (int i = 0; i < std::thread::hardware_concurrency(); i++) { | |
threads.emplace_back([&]() { | |
for (;;) { | |
Algo a1, a2; | |
{ | |
std::lock_guard<std::mutex> lock(scoreLock); | |
size_t ofs = rng() % scores.size(); | |
a1 = scores[ofs].second; | |
a2 = scores[rand() % (ofs + 1)].second; | |
} | |
Algo a = breed(a1, a2); | |
if (a.coef.size()) { | |
int ns = score(words, a); | |
std::cout << ns << ' ' << a.coef.substr(0, 8) << '(' << a.coef.size() << ") \r" << std::flush; | |
{ | |
std::lock_guard<std::mutex> lock(scoreLock); | |
scores.emplace_back(ns, a); | |
if (ns < scores.front().first | |
|| (ns == scores.front().first && a.coef.size() < scores.front().second.coef.size())) { | |
std::cout << "*** New record: \"" << a.coef << "\" -> " << ns << std::endl; | |
} | |
std::sort(scores.begin(), scores.end()); | |
scores.resize(std::min(size_t(65535), scores.size())); | |
} | |
} | |
} | |
}); | |
} | |
for (;;) { | |
std::this_thread::sleep_for(std::chrono::seconds(5)); | |
{ | |
std::lock_guard<std::mutex> lock(scoreLock); | |
std::cout << "Current record: \"" << scores.front().second.coef << "\" -> " << scores.front().first << std::endl; | |
} | |
} | |
for (auto& t : threads) { | |
t.join(); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
for http://codegolf.stackexchange.com/questions/76781/tweetable-hash-function-challenge/76920#76920