Skip to content

Instantly share code, notes, and snippets.

@tsu-nera
tsu-nera / balancer.py
Created June 27, 2017 13:07
Q-LearningでGyroBoyの立ち上げ
import numpy as np
import ev3dev.ev3 as ev3
import random, os, time
import pickle
# qlearningAgents.py
# ------------------
## based on http://inst.eecs.berkeley.edu/~cs188/sp09/pacman.html
import gym
import numpy as np
from qlearning_answer import QLearningAgent
env = gym.make("CartPole-v0")
n_actions = env.action_space.n
def build_state(features):
"""get our features and put all together converting into an integer"""
return int("".join(map(lambda feature: str(int(feature)), features)))
@tsu-nera
tsu-nera / q_tic_tac_toe.py
Last active June 22, 2017 21:59
Q学習法
import gym
import numpy as np
import gym_tic_tac_toe
from math import floor
env = gym.make('tic_tac_toe-v0')
n_states = 3 ** 9 # 状態数
n_actions = 9 # 行動数
eM = 1000 # 評価を行うエピソード数
import numpy as np
import gym
from gym.spaces import Discrete, Box
# ================================================================
# Policies
# ================================================================
class DeterministicDiscreteActionLinearPolicy(object):
@tsu-nera
tsu-nera / gradient_descent.ipynb
Created June 18, 2017 06:54
Gradient Descent
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@tsu-nera
tsu-nera / mc_tic_tac_toe.py
Created June 14, 2017 10:55
モンテカルロ法でtic-tac-toe
import gym
import numpy as np
import gym_tic_tac_toe
import random
from math import floor
import matplotlib.pyplot as plt
def random_plus_middle_move(moves, p):
if ([p, 4] in moves):
m = [p, 4]
import gym
import numpy as np
from gym import wrappers
env = gym.make("FrozenLake8x8-v0")
env = wrappers.Monitor(env, '/tmp/frozenlake-experiment-2')
env.reset()
n_states = env.observation_space.n
n_actions = env.action_space.n
import gym
import numpy as np
from gym import wrappers
env = gym.make("FrozenLake-v0")
env = wrappers.Monitor(env, '/tmp/cartpole-experiment-5')
env.reset();
n_states = env.observation_space.n
n_actions = env.action_space.n
import gym
import numpy as np
from gym import wrappers
env = gym.make("FrozenLake-v0")
env = wrappers.Monitor(env, '/tmp/cartpole-experiment-3')
env.reset();
n_states = env.observation_space.n
n_actions = env.action_space.n
import gym
import numpy as np
from gym import wrappers
env = gym.make("FrozenLake-v0")
env = wrappers.Monitor(env, '/tmp/cartpole-experiment-2')
env.reset();
n_states = env.observation_space.n
n_actions = env.action_space.n