Skip to content

Instantly share code, notes, and snippets.

View denisyarats's full-sized avatar

Denis Yarats denisyarats

View GitHub Profile
@denisyarats
denisyarats / on_policy_mc.py
Created January 15, 2017 05:32
on policy mc
#!/usr/local/bin/python
import argparse
import numpy as np
from collections import defaultdict
import gym
from gym import wrappers
import pdb
EXP_NAME_PREFIX = 'exp/on_policy_mc'
@denisyarats
denisyarats / gist:4981579b42c8a08e49206347f7d28c6c
Created January 17, 2017 07:16
./sarsa.py --max_episodes 10000 --alpha 0.3 --gamma 0.9 --eps 0.2 --eps_schedule 200 --goal 25 --env copy --upload
#!/usr/local/bin/python
"""
SARSA - on policy TD(0) learning.
Q(S, A) <- Q(S, A) + alpha * ((R + gamma * Q(S', A')) - Q(S, A))
A, A' ~ e-greedy from pi(A|S)
"""
import argparse
import numpy as np
#!/usr/local/bin/python
"""
Q-learning - off policy TD(0) learning.
Q(S, A) <- Q(S, A) + alpha * ((R + gamma * max(Q(S', A'))) - Q(S, A))
A ~ e-greedy from pi(A|S)
"""
import argparse
import numpy as np
#!/usr/local/bin/python
"""
Q-learning - off policy TD(0) learning.
Q(S, A) <- Q(S, A) + alpha * ((R + gamma * max(Q(S', A'))) - Q(S, A))
A ~ e-greedy from pi(A|S)
"""
import argparse
import numpy as np
@denisyarats
denisyarats / q_learning_lin_appr.py
Created January 24, 2017 07:06
q-learning with linear approximation
#!/usr/local/bin/python
"""
Q-learning with value fucntion approximation
"""
import argparse
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import argparse
import pdb
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np
#!/usr/bin/env python
"""
PyTorch implementation of DQN
Paper: https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf
"""
import argparse
import gym
from gym import wrappers
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init
from torch.autograd import Variable
from models.utils import *
class LayerNormGRUCell(nn.GRUCell):
def __init__(self, input_size, hidden_size, bias=True):
import dmc2gym
import numpy as np
import gym
import sys
seed = int(sys.argv[1])
env = dmc2gym.make(
'point_mass',
'easy',
seed,
import dmc2gym
import numpy as np
import gym
import sys
seed = int(sys.argv[1])
env = dmc2gym.make(
'point_mass',
'easy',
seed,