Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# Copyright (C) 2017 by Akira TAMAMORI | |
# Copyright (C) 2016 by hardmaru | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import random as rand | |
import datetime | |
class QLearner(object): | |
def __init__(self, \ | |
num_states=100, \ | |
num_actions = 4, \ | |
alpha = 0.2, \ | |
gamma = 0.9, \ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# See: Hochreiter, S., & Schmidhuber, J. (1996). | |
# Bridging long time lags by weight guessing and "Long Short-Term Memory". | |
# Spatiotemporal models in biological and artificial systems, 37, 65-72. | |
import numpy as np | |
import torch | |
from torch import nn | |
from torch.nn import functional as F | |
from torch.autograd import Variable | |
import sys |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
def gumbel_ST(logits, temp=1.0, hard=False): | |
eps = 1e-8 | |
gumbel_noise = -tf.log(-tf.log(tf.random_uniform(tf.shape(logits)) + eps) | |
+ eps) | |
y = tf.nn.softmax((logits + gumbel_noise) / temp) | |
if hard: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Author: Mathieu Blondel | |
# License: BSD 3 clause | |
import numpy as np | |
def projection_simplex(V, z=1, axis=None): | |
""" | |
Projection of x onto the simplex, scaled by z: | |
P(x; z) = argmin_{y >= 0, sum(y) = z} ||y - x||^2 |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def policyIterSP(game): | |
nnet = initNNet() # initialise random neural network | |
examples = [] | |
for i in range(numIters): | |
for e in range(numEps): | |
examples += executeEpisode(game, nnet) # collect examples from this game | |
new_nnet = trainNNet(examples) | |
frac_win = pit(new_nnet, nnet) # compare new net with previous net | |
if frac_win > threshold: | |
nnet = new_nnet # replace with new net |