Skip to content

Instantly share code, notes, and snippets.

@wallabra
Last active March 1, 2017 00:04
Show Gist options
  • Save wallabra/81ae18cb05909b15d9460542e46d0e9e to your computer and use it in GitHub Desktop.
Save wallabra/81ae18cb05909b15d9460542e46d0e9e to your computer and use it in GitHub Desktop.
Failed attempt at some Cyberspeare
import cPickle as pk
import random
import re
import difflib
from pybrain.tools.shortcuts import buildNetwork as new_net
from pybrain.datasets import SupervisedDataSet
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure import SoftmaxLayer
ic = "abcdefghijklmnopqrstuvwxyz-_!?,.;:@#$ "
num_seeds = 150
num_samples = 80
seed_size = 250
result_size = 85
iterations = 400
def random_segment(s, size):
pos = random.randint(0, len(s) - (size + 1))
return s[pos:pos + size]
def cap(s, min_len, default=" "):
return s + default * max(min_len - len(s), 0)
def tesselate(l, size, sep=" ", fill=" "):
r = []
while len(sep.join(r)) < size:
r.append(random.choice(l))
a = sep.join(r[:-1])
return cap(a, size, fill)
def character_pos(c):
return (float(list(ic).index(c)) / float(len(ic))) * 2.0 - 1.0
def de_character_pos(f):
try:
return ic[int(((f + 1.0) / 2.0) * len(ic))]
except IndexError:
return ""
def char_map(s):
return [character_pos(c) for c in s if c in ic]
def de_char_map(s):
return "".join([de_character_pos(c) for c in s])
assert de_char_map(char_map("hello world!")) == "hello world!"
print "Setting up..."
chars = "".join([x for x in re.sub(r'\s+', ' ', open('corpus.txt').read().lower()) if x in ic])
A = len(chars) # number of letters
_in = char_map(chars)
words = chars.split(" ")
seeds = [tesselate(words, ) for _ in xrange(num_seeds)]
fs = [char_map(s) for s in seeds]
word_size = max(len(s) for s in words)
ds = SupervisedDataSet(seed_size, result_size)
print "Filling dataset..."
for _ in xrange(num_samples):
sd = random.choice(fs)
c = tesselate(words, result_size)
print "[training] {} -> {}\n".format(de_char_map(sd), c)
ds.addSample(sd, char_map(c))
# create a model to train: input -> gru -> relu -> softmax.
try:
net = pk.load(open("thisnet.pickle"))
except IOError:
net = new_net(seed_size, (len(ic) * seed_size + result_size) / 2, result_size, hiddenclass=SoftmaxLayer)
trainer = BackpropTrainer(net, ds)
print "Seeding..."
seeds = ["".join([random.choice(ic) for _ in xrange(seed_size)]) for _ in xrange(num_seeds)]
fs = [char_map(s) for s in seeds]
print "Training..."
# train the model iteratively; draw a sample after every epoch.
for i in xrange(iterations):
print "{}:".format(i),
diff = trainer.train()
print ".",
seed = random.choice(fs)
print "{} -> {} ({}%)\n".format(de_char_map(seed), de_char_map(net.activate(seed)), 100.0 - diff * 100)
open("thisnet.pickle", "w").write(pk.dumps(net))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment