Skip to content

Instantly share code, notes, and snippets.

@taotao54321
Created November 8, 2016 07:12
Show Gist options
  • Save taotao54321/7988dd798f4f0a843d2f2a7a56c8ce64 to your computer and use it in GitHub Desktop.
Save taotao54321/7988dd798f4f0a843d2f2a7a56c8ce64 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Just a handmade algorithm. NOT AI.
import sys
import gym
### 4x4 MAP ###
# SFFF
# FHFH
# FFFH
# HFFG
###############
### 8x8 MAP ###
# SFFFFFFF
# FFFFFFFF
# FFFHFFFF
# FFFFFHFF
# FFFHFFFF
# FHHFFFHF
# FHFFHFHF
# FFFHFFFG
###############
# 0:L, 1:D, 2:R, 3:U
ACTIONS_MAP = {
"4x4" : (
0, 3, 3, 3,
0, -1, 0, -1,
3, 1, 0, -1,
-1, 2, 1, -1,
),
"8x8" : (
# ターン制限があるため、解ければなんでもいいわけではない
# これだと平均報酬 0.71 程度
# 3, 3, 3, 3, 3, 3, 3, 2,
# -1, -1, -1, -1, -1, -1, -1, 2,
# -1, -1, -1, -1, -1, -1, -1, 2,
# -1, -1, -1, -1, -1, -1, -1, 2,
# -1, -1, -1, -1, -1, -1, -1, 2,
# -1, -1, -1, -1, -1, -1, -1, 2,
# -1, -1, -1, -1, -1, -1, -1, 2,
# -1, -1, -1, -1, -1, -1, -1, -1,
# これだと平均報酬 0.88 程度
# 2, 2, 2, 2, 2, 2, 2, 2,
# 3, 3, 3, 3, 3, 3, 3, 2,
# -1, -1, -1, -1, -1, -1, -1, 2,
# -1, -1, -1, -1, -1, -1, -1, 2,
# -1, -1, -1, -1, -1, -1, -1, 2,
# -1, -1, -1, -1, -1, -1, -1, 2,
# -1, -1, -1, -1, -1, -1, -1, 2,
# -1, -1, -1, -1, -1, -1, -1, -1,
# 初手は必ず右に行くように変えてみた
3, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 2,
-1, -1, -1, -1, -1, -1, -1, 2,
-1, -1, -1, -1, -1, -1, -1, 2,
-1, -1, -1, -1, -1, -1, -1, 2,
-1, -1, -1, -1, -1, -1, -1, 2,
-1, -1, -1, -1, -1, -1, -1, 2,
-1, -1, -1, -1, -1, -1, -1, -1,
),
}
DEBUG = False
#DEBUG = True
ENVS = {
"4x4" : "FrozenLake-v0",
"8x8" : "FrozenLake8x8-v0",
}
def error(msg):
sys.exit(msg)
def get_action(actions, ob):
act = actions[ob]
assert 0 <= act <= 3
return act
def usage():
error("Usage: FrozenLake-handmade <4x4|8x8> [recdir]")
def main():
if len(sys.argv) < 3: usage()
env_name = ENVS[sys.argv[1]]
actions = ACTIONS_MAP[sys.argv[1]]
test_count = int(sys.argv[2])
rec_dir = sys.argv[3] if len(sys.argv) >= 4 else None
print("# <{}>".format(env_name))
env = gym.make(env_name)
step_max = env.spec.timestep_limit
print("# step-max: {}".format(step_max))
if rec_dir: env.monitor.start(rec_dir)
reward_total = 0.0
for episode in range(test_count):
ob = env.reset()
if DEBUG: env.render()
for t in range(step_max):
ob, reward, done, info = env.step(get_action(actions, ob))
if DEBUG:
env.render()
print(ob, reward, done, info)
if done:
reward_total += reward
break
if rec_dir: env.monitor.close()
print("episodes: {}".format(test_count))
print("total reward: {}".format(reward_total))
print("average reward: {:.2f}".format(reward_total / test_count))
if __name__ == "__main__": main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment