Takashi Nagata nagataka

たまに色々と忘れるので便利な資料をメモ

The repository is organized as follows:

src : Contains the source codes for all .... The source code is written in Python and it takes advantage of Numpy and Matplotlib. In order to run a simulation you have to use the file run_xxxx.py.
tools: In this folder you can find some tools for.... With yyy.py you can reproduce the figures found in ().
data: Here are saved all the results once you run a simulation.
params: Here you can find all the configuration files containing all the parameters (for each experiments).

	# OpenAI gym custom environment mimicking Blocking Maze
	# See Sutton and Barto "Reinforcement Learning an Introduction"
	# Example 8.2: Blocking Maze
	from enum import Enum
	import sys
	import copy

	import gym
	from gym import error, spaces, utils
	from gym.utils import seeding

	{
	"python.formatting.provider": "black",
	"python.linting.pylintEnabled": false,
	"python.linting.flake8Enabled": true,
	"python.linting.flake8Args": [
	"--ignore=E501,W503"
	],
	"python.sortImports.args": [
	"-m 3"
	],

	import random
	import numpy as np
	np.random.seed(0)

	def kerri(p, b):
	"""https://en.wikipedia.org/wiki/Kelly_criterion
	"""
	return (p*(b+1)-1 )/b

	N = 300

	import gym
	import ray
	from ray.rllib.agents.ppo import PPOTrainer, DEFAULT_CONFIG

	import pprint as pp

	#tune.run(PPOTrainer, config={"env": "Breakout-v0", "use_pytorch": True})
	ray.init(num_gpus=1, ignore_reinit_error=True, log_to_driver=False)

	# https://github.com/ray-project/ray/blob/master/rllib/agents/ppo/ppo.py#L15

	#!/bin/bash

	set -eu

	### Incoming WebHooks URL
	WEBHOOKURL="https://hooks.slack.com/services/FILL_YOUR_WEBHOOKURL"

	### channel
	CHANNEL=${CHANNEL:-"#notifications"}

	"""A template to implement RL agent with OpenAI Gym

	Usage: python ./gym_template.py --env=CarRacing-v0 --algo=policy_gradient --epochs 1

	implementation of algorithms need to be ./algorithms/ directory, or change the following line to your env
	> algo = import_module('algorithms.'+args.algo)
	"""
	import argparse
	import numpy as np

	import gym
	import sys
	sys.path.append("reinforcement-learning/lib/envs")
	import gridworld
	import random
	import numpy as np
	import copy

	NUM_EPOCHS = 10000
	GAMMA = 1.0