Created
April 6, 2014 05:50
-
-
Save spaghetti-source/10002001 to your computer and use it in GitHub Desktop.
cryptan
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
#include <deque> | |
#include <cstdio> | |
#include <cstdlib> | |
#include <map> | |
#include <cmath> | |
#include <cstring> | |
#include <functional> | |
#include <algorithm> | |
#include <unordered_map> | |
using namespace std; | |
#define fst first | |
#define snd second | |
double logP[0x100][0x100]; | |
int freq[0x100][0x101]; | |
void make_table(const char *file) { | |
FILE *fp = fopen(file, "r"); | |
for (char s[1024]; fgets(s, sizeof(s), fp); ) { | |
size_t n = strlen(s); | |
char p = ' '; | |
for (size_t i = 0; i < n; ++i) { | |
char c = s[i]; | |
if (0 <= c && c < 0x100 && 0 <= p && p < 0x100) { | |
++freq[p][c]; | |
++freq[p][0x100]; | |
} | |
p = c; | |
} | |
} | |
// additive smoothing | |
for (size_t i = 0; i < 0x100; ++i) { | |
for (size_t j = 0; j < 0x100; ++j) { | |
++freq[i][j]; | |
++freq[i][0x100]; | |
} | |
} | |
for (size_t i = 0; i < 0x100; ++i) { | |
for (size_t j = 0; j < 0x100; ++j) { | |
logP[i][j] = log(double(freq[i][j])) - log(double(freq[i][0x100])); | |
} | |
} | |
} | |
double loglikelihood(int s[], int n, vector<int> &table) { | |
double L = 0; | |
int p = ' '; | |
for (size_t i = 0; i < n; ++i) { | |
L += logP[table[p]][table[s[i]]]; | |
p = s[i]; | |
} | |
return L; | |
} | |
// greedy matching | |
vector<int> initial_solution(int s[], int n) { | |
vector<int> table(0x100); | |
for (size_t i = 0; i < 0x100; ++i) | |
table[i] = i; | |
unordered_map<int,int> f; | |
for (size_t i = 0; i < n; ++i) | |
++f[s[i]]; | |
vector<pair<int,int>> g(f.begin(), f.end()); | |
sort(g.begin(), g.end(), | |
[](pair<int,int> a, pair<int,int> b) { return a.snd > b.snd; }); | |
vector<pair<double,int>> h; | |
for (size_t i = 0; i < 0x100; ++i) | |
h.push_back(make_pair(freq[i][0x100], i)); | |
sort(h.rbegin(), h.rend()); | |
for (size_t i = 0; i < g.size(); ++i) { | |
printf("%02X -> %c\n", g[i].fst, h[i].snd); | |
swap(table[g[i].fst], table[h[i].snd]); | |
} | |
return table; | |
} | |
void cryptan1(const char *file) { | |
FILE *fp = fopen(file, "r"); | |
size_t n = 0; | |
int s[10000]; | |
while (~fscanf(fp, "%x", &s[n++])); | |
--n; | |
// used for pruning | |
int appear[0x100]; | |
for (size_t i = 0; i < n; ++i) | |
appear[s[i]] = 1; | |
// improve solution by bigram tabu search | |
vector<int> table = initial_solution(s, n); | |
double S = loglikelihood(s, n, table); | |
double best_S = S; | |
vector<int> best_table = table; | |
deque< vector<int> > history; | |
history.push_front(table); | |
size_t maxiter = 10000; | |
size_t w = 100, h = 10; | |
for (size_t iter = 0; iter < maxiter; ++iter) { | |
typedef pair<double, vector<int>> node; | |
vector<node> nbh; | |
while (nbh.size() < w) { | |
int i, j; | |
while (1) { | |
i = rand() % 0x100; | |
if (appear[i] == 0) continue; | |
j = rand() % 0x100; | |
if (table[j] < 31 || table[j] > 121) continue; | |
break; | |
} | |
swap(table[i], table[j]); | |
double T = loglikelihood(s, n, table); | |
bool in_history = false; | |
for (auto &t: history) | |
if (t == table) in_history = true; | |
if (!in_history) | |
nbh.push_back(node(T, table)); | |
swap(table[i], table[j]); | |
} | |
sort(nbh.rbegin(), nbh.rend()); | |
S = nbh[0].fst; | |
table = nbh[0].snd; | |
history.push_front(table); | |
while (history.size() > h) | |
history.pop_back(); | |
if (S > best_S) { | |
best_S = S; | |
best_table = table; | |
} | |
fprintf(stdout, "%zd / %zd; S = %f, bestS = %f\r", iter, maxiter, S, best_S); | |
if (iter % 100 == 0) { | |
printf("\n"); | |
for (size_t i = 0; i < n; ++i) { | |
printf("%c", best_table[s[i]]); | |
} | |
printf("\n"); | |
} | |
} | |
} | |
int main(int argc, char *argv[]) { | |
make_table(argv[1]); | |
cryptan1("cryptan1.txt"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment