Created
May 24, 2018 13:54
-
-
Save tristansokol/2c9eace463b65ac2730bd55d96a3feb2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Q-learning first attempt" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Optionally install what you need:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# import sys\n", | |
"# !{sys.executable} -m pip install seaborn gym-retro Pillow keras tensorflow opencv-python pandas matplotlib scipy" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"First we set up all of the imports that we will have with the project, there are a bunch:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "KeyboardInterrupt", | |
"evalue": "", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-2-5b9450a8a2b2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mpp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpprint\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPrettyPrinter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindent\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mcv2\u001b[0m \u001b[0;31m#OpenCV\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mgym_remote\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclient\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mgrc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/Development/Bobcats/reverie_agent/reverie_agent/lib/python3.6/site-packages/cv2/__init__.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mimportlib\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mcv2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mdata\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
] | |
} | |
], | |
"source": [ | |
"import random\n", | |
"import math\n", | |
"import retro\n", | |
"from PIL import Image\n", | |
"import gym\n", | |
"import pickle\n", | |
"import operator\n", | |
"import pprint\n", | |
"pp = pprint.PrettyPrinter(indent=4)\n", | |
"import numpy as np\n", | |
"import cv2 #OpenCV\n", | |
"import time\n", | |
"import gym_remote.client as grc\n", | |
"import gym_remote.exceptions as gre\n", | |
"import os\n", | |
"import json\n", | |
"import pandas as pd\n", | |
"from IPython.display import clear_output\n", | |
"from collections import deque\n", | |
"from matplotlib import pyplot as plt\n", | |
"plt.rcParams['figure.figsize'] = (30, 30)\n", | |
"import seaborn as sns\n", | |
"\n", | |
"from keras.initializers import normal, identity\n", | |
"from keras.models import model_from_json\n", | |
"from keras.models import Sequential\n", | |
"from keras.layers.core import Dense, Dropout, Activation, Flatten\n", | |
"from keras.layers.convolutional import Conv2D, MaxPooling2D\n", | |
"from keras.optimizers import SGD , Adam\n", | |
"import tensorflow as tf\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Helper functions handle disc IO, and help set the stage for recor keeping in a run. `show_img` was useful in rendering a frame of sonic, at various stages in the image processing. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# file system interactors\n", | |
"def save_obj(obj, name ):\n", | |
" with open(name + '.pkl', 'wb') as f: #dump files into objects folder\n", | |
" pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)\n", | |
"\n", | |
"def load_obj(name ):\n", | |
" try:\n", | |
" with open(name + '.pkl', 'rb') as f:\n", | |
" return pickle.load(f)\n", | |
" except FileNotFoundError:\n", | |
" if name == 'epsilon':\n", | |
" return .7;\n", | |
" return []\n", | |
" else:\n", | |
" return []\n", | |
" \n", | |
"output_dir = './run-'+time.strftime(\"%Y%m%d-%H:%M\")+'/'\n", | |
"if not os.path.exists(output_dir):\n", | |
" os.makedirs(output_dir)\n", | |
"loss_file_path = output_dir+\"loss_df.csv\"\n", | |
"\n", | |
"#Intialize log structures from file if exists else create new#Intiali \n", | |
"loss_df = pd.read_csv(loss_file_path) if os.path.isfile(loss_file_path) else pd.DataFrame(columns =['loss'])\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def show_img(image,graphs = False):\n", | |
" \"\"\"\n", | |
" Show images in new window\n", | |
" \"\"\"\n", | |
" while True:\n", | |
"# print(image.shape)\n", | |
"# image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)\n", | |
" processed = process_img(image)\n", | |
" window_title = \"logs\" if graphs else \"game_play\"\n", | |
" cv2.namedWindow(window_title, cv2.WINDOW_NORMAL) \n", | |
" cv2.moveWindow(window_title, 20,20);\n", | |
"# imS = cv2.resize(screen, (800, 400)) \n", | |
"# cv2.imshow(window_title, screen)\n", | |
" cv2.imshow(window_title, processed)\n", | |
" cv2.waitKey(5)\n", | |
" cv2.destroyAllWindows\n", | |
" break\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Set up a similar tracked environment as the jerk agent, but with a bit of extra stuff tacked in" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class TrackedEnv(gym.Wrapper):\n", | |
" \"\"\"\n", | |
" An environment that tracks the current trajectory and\n", | |
" the total number of timesteps ever taken.\n", | |
" \"\"\"\n", | |
"\n", | |
" def __init__(self, env):\n", | |
" super(TrackedEnv, self).__init__(env)\n", | |
" self.action_history = []\n", | |
" self.reward_history = []\n", | |
" self.total_reward = 0\n", | |
" self.total_steps_ever = 0\n", | |
" record_file_path = output_dir+\"record.csv\"\n", | |
" self.record = pd.read_csv(record_file_path) if os.path.isfile(record_file_path) else pd.DataFrame(columns = ['Timesteps','Total_Score'])\n", | |
" actions_file_path = output_dir+\"actions.csv\"\n", | |
" self.actions = pd.read_csv(actions_file_path) if os.path.isfile(actions_file_path) else pd.DataFrame(columns = ['Action','Intention'])\n", | |
" \n", | |
" def best_sequence(self):\n", | |
" \"\"\"\n", | |
" Get the prefix of the trajectory with the best\n", | |
" cumulative reward.\n", | |
" \"\"\"\n", | |
" max_cumulative = max(self.reward_history)\n", | |
" for i, rew in enumerate(self.reward_history):\n", | |
" if rew == max_cumulative:\n", | |
" return self.action_history[:i + 1]\n", | |
" raise RuntimeError('unreachable')\n", | |
"\n", | |
" # pylint: disable=E0202\n", | |
" def reset(self, **kwargs):\n", | |
" self.action_history = []\n", | |
" self.reward_history = []\n", | |
" self.total_reward = 0\n", | |
" return self.env.reset(**kwargs)\n", | |
"\n", | |
" def step(self, action):\n", | |
" self.total_steps_ever += 1\n", | |
" self.action_history.append(action.copy())\n", | |
" obs, rew, done, info = self.env.step(action)\n", | |
" if done:\n", | |
" data = pd.DataFrame({'Timesteps':[len(self.action_history)], 'Total_Score': [self.total_reward]})\n", | |
" self.record = self.record.append(data)\n", | |
"# self.record.loc[len(loss_df)] = score\n", | |
"# self.record.append([self.total_reward, len(self.action_history)])\n", | |
" print('rip')\n", | |
" self.total_reward += rew\n", | |
" self.reward_history.append(self.total_reward)\n", | |
" return obs, rew, done, info\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Hyperparameters, many of these are copied from the dino learning paper, and also the flappy bird paper referenced within." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"EXPLOIT_BIAS = 0.25 # 0.;5\n", | |
"TOTAL_TIMESTEPS = int(1e6)\n", | |
"\n", | |
"#game parameters\n", | |
"ACTIONS = 8 # possible actions\n", | |
"GAMMA = 0.99 # decay rate of past observations original 0.99\n", | |
"OBSERVATION = 200000. # timesteps to observe before training\n", | |
"EXPLORE = 200000 # frames over which to anneal epsilon\n", | |
"FINAL_EPSILON = 0.0001 # final value of epsilon\n", | |
"INITIAL_EPSILON = 0.1 # starting value of epsilon\n", | |
"REPLAY_MEMORY = 100000 # number of previous transitions to remember\n", | |
"BATCH = 32 # size of minibatch\n", | |
"FRAMERATE= 4 #how often to render\n", | |
"LEARNING_RATE = 1e-4\n", | |
"img_rows , img_cols = 120,84\n", | |
"img_channels = 4 #We stack 4 frames" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def buildmodel():\n", | |
" print(\"Now we build the model\")\n", | |
" model = Sequential()\n", | |
" model.add(Conv2D(32, (8, 8), strides=(4, 4), padding='same',input_shape=(img_cols,img_rows,img_channels))) #20*40*4\n", | |
" model.add(Activation('relu'))\n", | |
" model.add(Conv2D(64, (4, 4), strides=(2, 2), padding='same'))\n", | |
" model.add(Activation('relu'))\n", | |
" model.add(Conv2D(64, (3, 3), strides=(1, 1), padding='same'))\n", | |
" model.add(Activation('relu'))\n", | |
" model.add(Flatten())\n", | |
" model.add(Dense(512))\n", | |
" model.add(Activation('relu'))\n", | |
" model.add(Dense(ACTIONS))\n", | |
" adam = Adam(lr=LEARNING_RATE)\n", | |
" model.compile(loss='mean_squared_logarithmic_error',optimizer=adam)\n", | |
" print(\"We finish building the model\")\n", | |
" return model\n", | |
"# buildmodel().summary()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The actions and random move function are the commands we use to actually make actions on our state. Random move had the option to use a supplied value, for example when a move was predicted by the model. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Possible actions\n", | |
"# [\"B\", \"A\", \"MODE\", \"START\", \"UP\", \"DOWN\", \"LEFT\", \"RIGHT\", \"C\", \"Y\", \"X\", \"Z\"]\n", | |
"actions = [\n", | |
" [True, False, False, False, False, False, False, False, False, False, False, False],\n", | |
" [True, False, False, False, False, False, False, True, False, False, False, False],\n", | |
" [False, False, False, False, False, False, False, True, False, False, False, False],\n", | |
" [False, False, False, False, False, True, False, False, False, False, False, False],\n", | |
" [True, False, False, False, False, True, False, False, False, False, False, False],\n", | |
" [False, False, False, False, False, False, False, False, False, False, False, False],\n", | |
" [False, False, False, False, False, False, True, False, False, False, False, False],\n", | |
" [True, False, False, False, False, False, True, False, False, False, False, False],\n", | |
"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def random_move(env, Choice=None):\n", | |
" done = False\n", | |
" if Choice is None:\n", | |
" Choice = random.randint(1,len(actions))-1\n", | |
" # no info variable in contest environment\n", | |
" obs, rew, done, _ = env.step(actions[Choice])\n", | |
" \n", | |
" return rew, done, obs, Choice\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"`proccessObs` was used in image cropping and resizing, but was ultimately dropped. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def proccessObs(obs):\n", | |
" window_size_x = 180\n", | |
" window_size_y = 100\n", | |
" window_offset_x = 140 #int((320 - window_size_x)/2)\n", | |
" window_offset_y = 100 #int((224 - window_size_y)/2)\n", | |
" # lm = Image.fromarray(np.array(obs[window_offset_y:(window_offset_y+window_size_y),window_offset_x:(window_offset_x+window_size_x)]))\n", | |
" # lm.show()\n", | |
" # input(\"Press Enter to continue...\")\n", | |
" return obs[window_offset_y:(window_offset_y+window_size_y),window_offset_x:(window_offset_x+window_size_x)].flatten().tostring()\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"`process_img` takes a screen from the evironment and applies the resizing, edge detection, and color correction." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def process_img(image):\n", | |
" #crop out the dino agent from the frame\n", | |
" height, width = image.shape[:2]\n", | |
" image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)\n", | |
"# print(height)\n", | |
"# print(width)\n", | |
" image = image[round(height*.25):height,round(width*.25):width] #img[y:y+h, x:x+w] \n", | |
" image = cv2.resize(image, (0,0), fx = 0.5, fy = 0.5) \n", | |
"# height, width = image.shape[:2]\n", | |
"# print(height)\n", | |
"# print(width)\n", | |
" image = cv2.Canny(image, threshold1 = 100, threshold2 = 200) #apply the canny edge detection\n", | |
" return image " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def main(observe=False):\n", | |
" # Set up a new TrackedEnv that can keep track of total timestamps and store\n", | |
" # previous best solutions.\n", | |
" #\n", | |
" # env = grc.RemoteEnv('tmp/sock')\n", | |
" # env = TrackedEnv(env)\n", | |
"\n", | |
" \n", | |
" env = retro.make(game='SonicTheHedgehog-Genesis',\n", | |
" state='GreenHillZone.Act1',\n", | |
" scenario='contest',\n", | |
" record=output_dir)\n", | |
" env = TrackedEnv(env)\n", | |
"\n", | |
" # new_ep will keep track of if a new episode should be started.\n", | |
" new_ep = True\n", | |
" # solutions is an array of successful gameplay sequences as well as the\n", | |
" \n", | |
" solutions = []\n", | |
" \n", | |
" model = buildmodel()\n", | |
" x_t = process_img(np.zeros((224,320,3), dtype=np.uint8))\n", | |
" \n", | |
" s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)\n", | |
" s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1], s_t.shape[2]) #1*20*40*4\n", | |
"\n", | |
" initial_state = s_t\n", | |
" if observe :\n", | |
" OBSERVE = 999999999 #We keep observing, never train\n", | |
" epsilon = FINAL_EPSILON\n", | |
" print (\"Now we load weight\")\n", | |
" model.load_weights(\"model_final.h5\")\n", | |
" adam = Adam(lr=LEARNING_RATE)\n", | |
" model.compile(loss='mse',optimizer=adam)\n", | |
" print (\"Weight load successfully\") \n", | |
" else: #We go to training mode\n", | |
" OBSERVE = OBSERVATION\n", | |
" epsilon = load_obj(\"epsilon\") \n", | |
"# model.load_weights(\"model_final.h5\")\n", | |
" adam = Adam(lr=LEARNING_RATE)\n", | |
" model.compile(loss='mse',optimizer=adam)\n", | |
" \n", | |
" t = 0 # start of timesteps\n", | |
" D = deque()\n", | |
" while True:\n", | |
" \n", | |
" loss = 0\n", | |
" Q_sa = 0\n", | |
" action_index = 0\n", | |
" r_t = 0 #reward at 4\n", | |
" a_t = np.zeros([ACTIONS]) # action at t\n", | |
" if new_ep:\n", | |
" clear_output(wait=True)\n", | |
" print('%f%% done, reward: %f' % (env.total_steps_ever / 10000, env.record[\"Total_Score\"].mean()))\n", | |
" \n", | |
"# if (solutions and\n", | |
"# random.random() < EXPLOIT_BIAS + env.total_steps_ever / TOTAL_TIMESTEPS):\n", | |
"# solutions = sorted(solutions, key=lambda x: np.mean(x[0]))\n", | |
"# best_pair = solutions[-1]\n", | |
"# new_rew = exploit(env, best_pair[1])\n", | |
"# best_pair[0].append(new_rew)\n", | |
"# print('replayed best with reward %f' % new_rew)\n", | |
"# continue\n", | |
"# else:\n", | |
" env.reset()\n", | |
" new_ep = False\n", | |
" if random.random() <= epsilon: #randomly explore an action\n", | |
"# print(\"----------Random Action----------\")\n", | |
" action_index = random.randrange(len(actions[:]))\n", | |
" env.actions.loc[len(env.actions)]= {'Action':action_index, 'Intention': 'Random'}\n", | |
"\n", | |
" \n", | |
" else: # predict the output\n", | |
"# print(\"----------Predicted----------\")\n", | |
" q = model.predict(s_t) #input a stack of 4 images, get the prediction\n", | |
" max_Q = np.argmax(q) # chosing index with maximum q value\n", | |
" action_index = max_Q \n", | |
" env.actions.loc[len(env.actions)]= {'Action':action_index, 'Intention': 'Predicted'}\n", | |
" \n", | |
" \n", | |
" #We reduced the epsilon (exploration parameter) gradually\n", | |
" if epsilon > FINAL_EPSILON and t > OBSERVE:\n", | |
" epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE \n", | |
" \n", | |
"# action_index=2;\n", | |
"# x_t1 ~ obs? terminal ~ done\n", | |
" #run the selected action and observed next state and reward\n", | |
"# x_t1, r_t, terminal = game_state.get_state(a_t)\n", | |
"# print(t,'doing action',action_index)\n", | |
" reward, done, obs, choice = random_move(env,Choice=action_index)\n", | |
" x_t1 = process_img(obs)\n", | |
"# show_img(obs)\n", | |
"# if t % FRAMERATE ==0:\n", | |
"# env.render()\n", | |
" x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1], 1) #1x168x320x1\n", | |
" s_t1 = np.append(x_t1, s_t[:, :, :, :3], axis=3) # append the new image to input stack and remove the first one\n", | |
" \n", | |
" D.append((s_t, action_index, reward, s_t1, done))\n", | |
"\n", | |
" if len(D) > REPLAY_MEMORY:\n", | |
" D.popleft()\n", | |
" \n", | |
" #only train if done observing\n", | |
" if t > OBSERVE: \n", | |
" \n", | |
" #sample a minibatch to train on\n", | |
"\n", | |
" minibatch = random.sample(D, BATCH)\n", | |
" inputs = np.zeros((BATCH, s_t.shape[1], s_t.shape[2], s_t.shape[3])) #32, 20, 40, 4\n", | |
" targets = np.zeros((inputs.shape[0], ACTIONS)) #32, 2\n", | |
"\n", | |
" #Now we do the experience replay\n", | |
" for i in range(0, len(minibatch)):\n", | |
" state_t = minibatch[i][0] # 4D stack of images\n", | |
" action_t = minibatch[i][1] #This is action index\n", | |
" reward_t = minibatch[i][2] #reward at state_t due to action_t\n", | |
" state_t1 = minibatch[i][3] #next state\n", | |
" terminal = minibatch[i][4] #wheather the agent died or survided due the action\n", | |
" \n", | |
"\n", | |
" inputs[i:i + 1] = state_t \n", | |
"\n", | |
" targets[i] = model.predict(state_t) # predicted q values\n", | |
" Q_sa = model.predict(state_t1) #predict q values for next step\n", | |
" \n", | |
" if terminal:\n", | |
" targets[i, action_t] = reward_t # if terminated, only equals reward\n", | |
" else:\n", | |
" targets[i, action_t] = reward_t + GAMMA * np.max(Q_sa)\n", | |
"\n", | |
" loss += model.train_on_batch(inputs, targets)\n", | |
" loss_df.loc[len(loss_df)] = loss\n", | |
" s_t = initial_state if done else s_t1 #reset game to initial frame if terminate\n", | |
" t = t + 1\n", | |
"# env.render()\n", | |
"# print(t, env.total_reward )\n", | |
"# print(env.record)\n", | |
" state = \"\"\n", | |
" if t <= OBSERVE:\n", | |
" state = \"observe\"\n", | |
" elif t > OBSERVE and t <= OBSERVE + EXPLORE:\n", | |
" state = \"explore\"\n", | |
" else:\n", | |
" state = \"train\"\n", | |
" if t % 100 == 0:\n", | |
" print(\"T\", t, \"/ STATE\", state,\"/ ε\", round(epsilon,3), \"/ REWARD\", round(env.total_reward),\"/ Q_MAX \" , np.max(Q_sa), \"/ Loss \", loss)\n", | |
"\n", | |
" if done:\n", | |
" new_ep = True\n", | |
" \n", | |
" if t % 1000 == 0:\n", | |
" model.save_weights(output_dir+\"model_weights.h5\", overwrite=True)\n", | |
"# save_obj(D,output_dir+\"D\") #saving episodes\n", | |
" save_obj(t,output_dir+\"time\") #caching time steps\n", | |
" save_obj(epsilon,output_dir+\"epsilon\") #cache epsilon to avoid repeated randomness in actions\n", | |
" loss_df.to_csv(output_dir+\"loss_df.csv\",index=False)\n", | |
" env.record.to_csv(output_dir+\"records.csv\",index=False)\n", | |
" env.actions.to_csv(output_dir+\"actions.csv\",index=False)\n", | |
" with open(\"model.json\", \"w\") as outfile:\n", | |
" json.dump(model.to_json(), outfile)\n", | |
" if t >1000000:\n", | |
" exit();\n", | |
"# # rew, new_ep = move_n_learn(env, 1)\n", | |
"# if not new_ep and rew <= 0:\n", | |
"# print('backtracking due to negative reward: %f' % rew)\n", | |
"# _, new_ep = move_n_learn(env, 70, left=True)\n", | |
"# if new_ep:\n", | |
"# solutions.append(([max(env.reward_history)], env.best_sequence()))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [], | |
"source": [ | |
"if __name__ == '__main__':\n", | |
" try:\n", | |
" main()\n", | |
" except gre.GymRemoteError as exc:\n", | |
" print('exception', exc)\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Finially, there a a function that can take the hardcoded folder name of a previous run, and show you the loss over time as well as the distribution in moves between the random & model predictions. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def show_plots():\n", | |
" rundir = 'run-20180519-14:14'\n", | |
" fig, axs = plt.subplots(ncols=1,nrows =3,figsize=(15,15))\n", | |
" axs[0].set_title('Loss')\n", | |
" axs[1].set_title('Game Score progress')\n", | |
" loss_df = pd.read_csv(\"./\"+rundir+\"/loss_df.csv\")#.set_yscale('log')\n", | |
" scores_df = pd.read_csv(\"./\"+rundir+\"/records.csv\")\n", | |
" actions_df = pd.read_csv(\"./\"+rundir+\"/actions.csv\")\n", | |
" actions_df['Action'] = actions_df['Action'].astype('float') \n", | |
" loss_df['loss'] = loss_df['loss'].astype('float') \n", | |
" loss_df.plot(use_index=True,ax=axs[0]).set_yscale('log')\n", | |
"\n", | |
"\n", | |
"\n", | |
" sns.distplot(actions_df['Action'].loc[actions_df['Intention'] == 'Predicted'])\n", | |
" sns.distplot(actions_df['Action'].loc[actions_df['Intention'] == 'Random'])\n", | |
" scores_df.plot(ax=axs[1])\n", | |
" imgg = fig.canvas.draw()\n", | |
"show_plots()" | |
] | |
} | |
], | |
"metadata": { | |
"celltoolbar": "Raw Cell Format", | |
"kernelspec": { | |
"display_name": "reverie_agent", | |
"language": "python", | |
"name": "reverie_agent" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment