Skip to content

Instantly share code, notes, and snippets.

View manuel-delverme's full-sized avatar
😀

Manuel manuel-delverme

😀
View GitHub Profile
import torch
import tensorboardX
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import torch.utils.data
writer = tensorboardX.SummaryWriter()
def train_network(samples, neural_network, nr_epochs=10, batch_size=64):
optimizer = optim.Adam(neural_network.parameters())
neural_network.train()
for epoch_nr in range(nr_epochs):
sample_ids = np.random.shuffle(range(len(samples)))
for start in range(0, len(samples) // batch_size, batch_size):
mini_batch = samples[sample_ids[start: start + batch_size]]
boards, pis, vs = zip(*mini_batch)
@manuel-delverme
manuel-delverme / zero.py
Last active June 11, 2018 14:31
zero code
environment = environments.GoEnvironment(board_size=19)
player_mcts = mcts.MCTS(
environment,
networks.NeuralNetwork(board_size=environment.getStateSize(), action_size=environment.getActionSize()),
)
training_samples = collections.deque(maxlen=opt.training_samples_buffer_size)
for iteration_number in range(opt.num_iters):
score: 19.46759259259259 options: nr:4 names:7 10 25 27
score: 11.435185185185185 options: nr:5 names:7 10 25 27 28
score: 25.23611111111111 options: nr:6 names:6 7 10 25 27 28
score: 18.35648148148148 options: nr:7 names:6 7 9 10 25 27 28
score: 23.541666666666668 options: nr:8 names:6 7 9 10 24 25 27 28
score: 23.38888888888889 options: nr:9 names:6 7 9 10 13 24 25 27 28
score: 8.62037037037037 options: nr:10 names:6 7 9 10 13 16 24 25 27 28
score: 24.101851851851848 options: nr:11 names:6 7 9 10 13 16 24 25 27 28 31
score: 18.96759259259259 options: nr:12 names:6 7 9 10 13 16 24 25 27 28 31 34
score: 17.833333333333332 options: nr:13 names:6 7 9 10 12 13 16 24 25 27 28 31 34
/home/awok/Projects/supervised_reward/env_reward/bin/python /home/awok/Projects/supervised_reward/main.py
(2_w,4mirr1)-aCMA-ES (mu_w=1.5,w_1=80%) in dimension 18 (seed=237288, Fri Jan 5 12:17:27 2018)
score: 7940.826388888889 options: 8 4 5 10 11 15 16 17 23
score: 5839.784722222223 options: 10 21 22 23 26 27 28 29 33 34 35
score: 8771.63888888889 options: 4 4 5 10 11
score: -1107.7361111111113 options: 14 0 1 2 3 4 6 7 8 9 10 13 14 15 16
[7940.826388888889, 5839.784722222223, 8771.63888888889, -1107.7361111111113]
best [-0.21687281 -0.29423262 0.10809115 -0.3457722 -0.18912326 0.17178892 0.14703262 0.94997003 -0.18883859 -0.82346577 0.50633336 -0.17325047 0.37087813 0.63369408 0.07967291 -0.47341161 -0.68896583 -0.4226999 ] fitness -8771.63888888889
score: 11841.0625 options: 3 29 34 35
score: 11929.47222222222 options: 4 28 29 34 35
import timeit
setup = 'import numpy as np; a=np.random.randn(10)'
reshape = timeit.Timer('a.reshape(-1, 10)', setup=setup)
transp = timeit.Timer('a.transpose()', setup=setup)
T = timeit.Timer('a.T', setup=setup)
print("reshape", reshape.timeit(number=int(1e6)))
print("transp", transp.timeit(number=int(1e6)))
print("T", T.timeit(number=int(1e6)))
print("reshape", reshape.timeit(number=int(1e6)))
gid WinLoss GameResult abs(deltaQ) last_turn Q_table_hitratio Q_table_misses
20147 0.9468599033816426 PlayState.WON 6 11 0.8437662644901822 1
20252 0.9471153846153847 PlayState.WON 0 12 0.8438516131687727 140
20280 0.9473684210526316 PlayState.WON 0 12 0.8441057721649726 9
20373 0.9476190476190477 PlayState.WON 22 11 0.8448002240477969 35
20554 0.9481132075471699 PlayState.WON 49 11 0.8436607834298462 452
20749 0.9483568075117372 PlayState.WON 28 12 0.8436584932582274 231
20818 0.9488372093023257 PlayState.WON 17 12 0.8437433943292638 68
20871 0.9490740740740742 PlayState.WON 32 11 0.843652254171913 91
21149 0.9493087557603688 PlayState.WON 17 12 0.8443073667385199 221
@manuel-delverme
manuel-delverme / printout.csv
Last active May 11, 2017 09:33
hearlstone0.1 output
games_played win % result abs_change turn
6632 0.6436972255729795 PlayState.LOST 52.7 31
6633 0.6436001809136137 PlayState.LOST 0 23
6634 0.6436539041302382 PlayState.WON 119.85 19
6635 0.6437076111529766 PlayState.WON 107.95 13
6636 0.64376130198915 PlayState.WON 0 21
6637 0.6436643061624228 PlayState.LOST 0 23
6638 0.6435673395601085 PlayState.LOST 0 22
6639 0.6434704021690013 PlayState.LOST 0 17
6640 0.6435240963855422 PlayState.WON 0 25
/usr/bin/python3.5 /home/awok/Documents/sapienza/s1/nlp/hw2/src/homework2.py model ../ ../resources
Using Theano backend.
WARNING (theano.sandbox.cuda): The cuda backend is deprecated and will be removed in the next release (v0.10). Please switch to the gpuarray backend. You can get more information about how to switch at this URL:
https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29
ERROR (theano.sandbox.cuda): nvcc compiler not found on $PATH. Check your nvcc installation and try again.
model_output_path model
homework_dir: ../
model output: model
/usr/bin/python3.5 /home/awok/Documents/sapienza/s1/nlp/hw2/src/homework2.py model ../ ../resources
Using Theano backend.
model_output_path model
homework_dir: ../
model output: model
homework dir: ../
src dir: ../src/
data dir: ../data/
WARNING (theano.sandbox.cuda): The cuda backend is deprecated and will be removed in the next release (v0.10). Please switch to the gpuarray backend. You can get more information about how to switch at this URL: