Skip to content

Instantly share code, notes, and snippets.

@lirnli
Last active August 27, 2017 04:51
Show Gist options
  • Save lirnli/69a84071587f59d18386d6b2072bddaf to your computer and use it in GitHub Desktop.
Save lirnli/69a84071587f59d18386d6b2072bddaf to your computer and use it in GitHub Desktop.
Simple Deep Q learn for Copy-v0
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# OpenAI gym: Copy-v0 with Simple Q-learning \n",
"- Observation dim = [1] \n",
"- Action dim = [2,2,5]. Use encoding 0-19\n",
"- The rest is a standard Q learning with a memory relay\n",
"- Initial exploration is random. I found 100 episodes were not enough, and I kept using 1000 episodes."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2017-08-27T00:30:05.059532\n"
]
}
],
"source": [
"import datetime\n",
"print(datetime.datetime.now().isoformat())"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"import numpy as np\n",
"import gym\n",
"from gym import wrappers"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [],
"source": [
"tf.reset_default_graph()\n",
"k_init = tf.contrib.layers.variance_scaling_initializer()\n",
"X = tf.placeholder(tf.int32,shape=[None,1])\n",
"hid1 = tf.reshape(tf.one_hot(X,6),[-1,6])\n",
"hid2 = tf.layers.dense(hid1,64,activation=tf.nn.elu,kernel_initializer=k_init)\n",
"hid3 = tf.layers.dense(hid2,128,activation=tf.nn.elu,kernel_initializer=k_init)\n",
"Q = tf.layers.dense(hid2,20)\n",
"action_ph = tf.placeholder(tf.int32,shape=[None,])\n",
"Q_action = tf.reduce_sum(Q*tf.one_hot(action_ph,20),axis=1,keep_dims=True)\n",
"Qexpected = tf.placeholder(tf.float32,shape=[None,1])\n",
"loss = tf.losses.mean_squared_error(Qexpected,Q_action)\n",
"optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)\n",
"train_op = optimizer.minimize(loss)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from collections import deque\n",
"class Memory(object):\n",
" def __init__(self,memory_size=10000):\n",
" self.memory = deque(maxlen=memory_size)\n",
" self.memory_size = memory_size\n",
" \n",
" def __len__(self):\n",
" return len(self.memory)\n",
" \n",
" def append(self,item):\n",
" self.memory.append(item)\n",
" \n",
" def sample_batch(self,batch_size=256):\n",
" idx = np.random.permutation(len(self.memory))[:batch_size]\n",
" return [self.memory[i] for i in idx]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-08-27 00:30:10,892] Making new env: Copy-v0\n",
"[2017-08-27 00:30:10,985] Clearing 18 monitor files from previous run (because force=True was provided)\n",
"[2017-08-27 00:30:10,988] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000000.json\n",
"[2017-08-27 00:30:11,094] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000001.json\n",
"[2017-08-27 00:30:11,099] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000008.json\n",
"[2017-08-27 00:30:11,114] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000027.json\n",
"[2017-08-27 00:30:11,140] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000064.json\n",
"[2017-08-27 00:30:11,173] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000125.json\n",
"[2017-08-27 00:30:11,227] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000216.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"iter 0, ep 0\r",
"iter 1, ep 0\r",
"iter 2, ep 0\r",
"iter 3, ep 1\r",
"iter 4, ep 1\r",
"iter 5, ep 1\r",
"iter 6, ep 1\r",
"iter 7, ep 1\r",
"iter 8, ep 2\r",
"iter 9, ep 2\r",
"iter 10, ep 3\r",
"iter 11, ep 4\r",
"iter 12, ep 5\r",
"iter 13, ep 6\r",
"iter 14, ep 6\r",
"iter 15, ep 7\r",
"iter 16, ep 7\r",
"iter 17, ep 8\r",
"iter 18, ep 9\r",
"iter 19, ep 9\r",
"iter 20, ep 9\r",
"iter 21, ep 10\r",
"iter 22, ep 10\r",
"iter 23, ep 10\r",
"iter 24, ep 10\r",
"iter 25, ep 11\r",
"iter 26, ep 12\r",
"iter 27, ep 12\r",
"iter 28, ep 12\r",
"iter 29, ep 12\r",
"iter 30, ep 12\r",
"iter 31, ep 12\r",
"iter 32, ep 13\r",
"iter 33, ep 13\r",
"iter 34, ep 13\r",
"iter 35, ep 13\r",
"iter 36, ep 14\r",
"iter 37, ep 14\r",
"iter 38, ep 14\r",
"iter 39, ep 14\r",
"iter 40, ep 14\r",
"iter 41, ep 14\r",
"iter 42, ep 14\r",
"iter 43, ep 15\r",
"iter 44, ep 16\r",
"iter 45, ep 16\r",
"iter 46, ep 17\r",
"iter 47, ep 17\r",
"iter 48, ep 17\r",
"iter 49, ep 17\r",
"iter 50, ep 18\r",
"iter 51, ep 18\r",
"iter 52, ep 19\r",
"iter 53, ep 19\r",
"iter 54, ep 19\r",
"iter 55, ep 19\r",
"iter 56, ep 19\r",
"iter 57, ep 19\r",
"iter 58, ep 19\r",
"iter 59, ep 19\r",
"iter 60, ep 19\r",
"iter 61, ep 20\r",
"iter 62, ep 20\r",
"iter 63, ep 21\r",
"iter 64, ep 21\r",
"iter 65, ep 22\r",
"iter 66, ep 22\r",
"iter 67, ep 23\r",
"iter 68, ep 23\r",
"iter 69, ep 23\r",
"iter 70, ep 24\r",
"iter 71, ep 24\r",
"iter 72, ep 25\r",
"iter 73, ep 25\r",
"iter 74, ep 26\r",
"iter 75, ep 26\r",
"iter 76, ep 26\r",
"iter 77, ep 27\r",
"iter 78, ep 27\r",
"iter 79, ep 27\r",
"iter 80, ep 27\r",
"iter 81, ep 27\r",
"iter 82, ep 27\r",
"iter 83, ep 27\r",
"iter 84, ep 27\r",
"iter 85, ep 28\r",
"iter 86, ep 28\r",
"iter 87, ep 29\r",
"iter 88, ep 30\r",
"iter 89, ep 30\r",
"iter 90, ep 31\r",
"iter 91, ep 32\r",
"iter 92, ep 32\r",
"iter 93, ep 32\r",
"iter 94, ep 33\r",
"iter 95, ep 34\r",
"iter 96, ep 34\r",
"iter 97, ep 35\r",
"iter 98, ep 35\r",
"iter 99, ep 35\r",
"iter 100, ep 35\r",
"iter 101, ep 35\r",
"iter 102, ep 36\r",
"iter 103, ep 36\r",
"iter 104, ep 36\r",
"iter 105, ep 36\r",
"iter 106, ep 36\r",
"iter 107, ep 36\r",
"iter 108, ep 37\r",
"iter 109, ep 38\r",
"iter 110, ep 39\r",
"iter 111, ep 40\r",
"iter 112, ep 40\r",
"iter 113, ep 40\r",
"iter 114, ep 41\r",
"iter 115, ep 41\r",
"iter 116, ep 42\r",
"iter 117, ep 42\r",
"iter 118, ep 43\r",
"iter 119, ep 44\r",
"iter 120, ep 44\r",
"iter 121, ep 44\r",
"iter 122, ep 45\r",
"iter 123, ep 45\r",
"iter 124, ep 45\r",
"iter 125, ep 46\r",
"iter 126, ep 46\r",
"iter 127, ep 46\r",
"iter 128, ep 46\r",
"iter 129, ep 46\r",
"iter 130, ep 46\r",
"iter 131, ep 47\r",
"iter 132, ep 47\r",
"iter 133, ep 47\r",
"iter 134, ep 47\r",
"iter 135, ep 48\r",
"iter 136, ep 49\r",
"iter 137, ep 50\r",
"iter 138, ep 51\r",
"iter 139, ep 51\r",
"iter 140, ep 51\r",
"iter 141, ep 51\r",
"iter 142, ep 51\r",
"iter 143, ep 51\r",
"iter 144, ep 52\r",
"iter 145, ep 52\r",
"iter 146, ep 52\r",
"iter 147, ep 53\r",
"iter 148, ep 53\r",
"iter 149, ep 53\r",
"iter 150, ep 53\r",
"iter 151, ep 53\r",
"iter 152, ep 53\r",
"iter 153, ep 53\r",
"iter 154, ep 53\r",
"iter 155, ep 54\r",
"iter 156, ep 54\r",
"iter 157, ep 54\r",
"iter 158, ep 54\r",
"iter 159, ep 54\r",
"iter 160, ep 54\r",
"iter 161, ep 55\r",
"iter 162, ep 55\r",
"iter 163, ep 55\r",
"iter 164, ep 56\r",
"iter 165, ep 57\r",
"iter 166, ep 57\r",
"iter 167, ep 58\r",
"iter 168, ep 58\r",
"iter 169, ep 58\r",
"iter 170, ep 58\r",
"iter 171, ep 58\r",
"iter 172, ep 58\r",
"iter 173, ep 58\r",
"iter 174, ep 59\r",
"iter 175, ep 60\r",
"iter 176, ep 61\r",
"iter 177, ep 62\r",
"iter 178, ep 62\r",
"iter 179, ep 62\r",
"iter 180, ep 63\r",
"iter 181, ep 63\r",
"iter 182, ep 64\r",
"iter 183, ep 64\r",
"iter 184, ep 65\r",
"iter 185, ep 66\r",
"iter 186, ep 67\r",
"iter 187, ep 67\r",
"iter 188, ep 68\r",
"iter 189, ep 69\r",
"iter 190, ep 70\r",
"iter 191, ep 70\r",
"iter 192, ep 71\r",
"iter 193, ep 71\r",
"iter 194, ep 72\r",
"iter 195, ep 72\r",
"iter 196, ep 73\r",
"iter 197, ep 74\r",
"iter 198, ep 75\r",
"iter 199, ep 76\r",
"iter 200, ep 77\r",
"iter 201, ep 78\r",
"iter 202, ep 78\r",
"iter 203, ep 78\r",
"iter 204, ep 79\r",
"iter 205, ep 80\r",
"iter 206, ep 80\r",
"iter 207, ep 80\r",
"iter 208, ep 81\r",
"iter 209, ep 82\r",
"iter 210, ep 82\r",
"iter 211, ep 82\r",
"iter 212, ep 82\r",
"iter 213, ep 82\r",
"iter 214, ep 83\r",
"iter 215, ep 83\r",
"iter 216, ep 83\r",
"iter 217, ep 83\r",
"iter 218, ep 83\r",
"iter 219, ep 83\r",
"iter 220, ep 83\r",
"iter 221, ep 84\r",
"iter 222, ep 85\r",
"iter 223, ep 86\r",
"iter 224, ep 87\r",
"iter 225, ep 87\r",
"iter 226, ep 87\r",
"iter 227, ep 87\r",
"iter 228, ep 88\r",
"iter 229, ep 88\r",
"iter 230, ep 88\r",
"iter 231, ep 89\r",
"iter 232, ep 89\r",
"iter 233, ep 89\r",
"iter 234, ep 90\r",
"iter 235, ep 91\r",
"iter 236, ep 91\r",
"iter 237, ep 92\r",
"iter 238, ep 92\r",
"iter 239, ep 92\r",
"iter 240, ep 92\r",
"iter 241, ep 93\r",
"iter 242, ep 93\r",
"iter 243, ep 94\r",
"iter 244, ep 95\r",
"iter 245, ep 95\r",
"iter 246, ep 96\r",
"iter 247, ep 97\r",
"iter 248, ep 97\r",
"iter 249, ep 97\r",
"iter 250, ep 97\r",
"iter 251, ep 97\r",
"iter 252, ep 97\r",
"iter 253, ep 97\r",
"iter 254, ep 98\r",
"iter 255, ep 99\r",
"iter 256, ep 100\r",
"iter 257, ep 101\r",
"iter 258, ep 101\r",
"iter 259, ep 101\r",
"iter 260, ep 102\r",
"iter 261, ep 102\r",
"iter 262, ep 102\r",
"iter 263, ep 102\r",
"iter 264, ep 103\r",
"iter 265, ep 103\r",
"iter 266, ep 103\r",
"iter 267, ep 103\r",
"iter 268, ep 104\r",
"iter 269, ep 104\r",
"iter 270, ep 104\r",
"iter 271, ep 104\r",
"iter 272, ep 104\r",
"iter 273, ep 104\r",
"iter 274, ep 105\r",
"iter 275, ep 105\r",
"iter 276, ep 105\r",
"iter 277, ep 105\r",
"iter 278, ep 106\r",
"iter 279, ep 106\r",
"iter 280, ep 106\r",
"iter 281, ep 107\r",
"iter 282, ep 107\r",
"iter 283, ep 107\r",
"iter 284, ep 107\r",
"iter 285, ep 107\r",
"iter 286, ep 108\r",
"iter 287, ep 109\r",
"iter 288, ep 109\r",
"iter 289, ep 109\r",
"iter 290, ep 109\r",
"iter 291, ep 110\r",
"iter 292, ep 111\r",
"iter 293, ep 111\r",
"iter 294, ep 111\r",
"iter 295, ep 111\r",
"iter 296, ep 112\r",
"iter 297, ep 112\r",
"iter 298, ep 112\r",
"iter 299, ep 112\r",
"iter 300, ep 112\r",
"iter 301, ep 113\r",
"iter 302, ep 114\r",
"iter 303, ep 114\r",
"iter 304, ep 114\r",
"iter 305, ep 115\r",
"iter 306, ep 115\r",
"iter 307, ep 116\r",
"iter 308, ep 117\r",
"iter 309, ep 117\r",
"iter 310, ep 117\r",
"iter 311, ep 117\r",
"iter 312, ep 117\r",
"iter 313, ep 118\r",
"iter 314, ep 119\r",
"iter 315, ep 120\r",
"iter 316, ep 120\r",
"iter 317, ep 121\r",
"iter 318, ep 122\r",
"iter 319, ep 122\r",
"iter 320, ep 123\r",
"iter 321, ep 124\r",
"iter 322, ep 124\r",
"iter 323, ep 125\r",
"iter 324, ep 126\r",
"iter 325, ep 126\r",
"iter 326, ep 126\r",
"iter 327, ep 126\r",
"iter 328, ep 126\r",
"iter 329, ep 126\r",
"iter 330, ep 126\r",
"iter 331, ep 126\r",
"iter 332, ep 127\r",
"iter 333, ep 127\r",
"iter 334, ep 127\r",
"iter 335, ep 128\r",
"iter 336, ep 129\r",
"iter 337, ep 130\r",
"iter 338, ep 131\r",
"iter 339, ep 131\r",
"iter 340, ep 131\r",
"iter 341, ep 132\r",
"iter 342, ep 132\r",
"iter 343, ep 133\r",
"iter 344, ep 133\r",
"iter 345, ep 133\r",
"iter 346, ep 133\r",
"iter 347, ep 134\r",
"iter 348, ep 135\r",
"iter 349, ep 135\r",
"iter 350, ep 136\r",
"iter 351, ep 137\r",
"iter 352, ep 137\r",
"iter 353, ep 137\r",
"iter 354, ep 138\r",
"iter 355, ep 139\r",
"iter 356, ep 140\r",
"iter 357, ep 140\r",
"iter 358, ep 140\r",
"iter 359, ep 140\r",
"iter 360, ep 141\r",
"iter 361, ep 141\r",
"iter 362, ep 141\r",
"iter 363, ep 141\r",
"iter 364, ep 142\r",
"iter 365, ep 142\r",
"iter 366, ep 143\r",
"iter 367, ep 144\r",
"iter 368, ep 145\r",
"iter 369, ep 145\r",
"iter 370, ep 145\r",
"iter 371, ep 146\r",
"iter 372, ep 146\r",
"iter 373, ep 147\r",
"iter 374, ep 147\r",
"iter 375, ep 148\r",
"iter 376, ep 149\r",
"iter 377, ep 150\r",
"iter 378, ep 150\r",
"iter 379, ep 151\r",
"iter 380, ep 151\r",
"iter 381, ep 151\r",
"iter 382, ep 151\r",
"iter 383, ep 151\r",
"iter 384, ep 151\r",
"iter 385, ep 151\r",
"iter 386, ep 151\r",
"iter 387, ep 151\r",
"iter 388, ep 151\r",
"iter 389, ep 152\r",
"iter 390, ep 152\r",
"iter 391, ep 153\r",
"iter 392, ep 154\r",
"iter 393, ep 155\r",
"iter 394, ep 155\r",
"iter 395, ep 155\r",
"iter 396, ep 155\r",
"iter 397, ep 155\r",
"iter 398, ep 155\r",
"iter 399, ep 156\r",
"iter 400, ep 157\r",
"iter 401, ep 157\r",
"iter 402, ep 158\r",
"iter 403, ep 159\r",
"iter 404, ep 159\r",
"iter 405, ep 160\r",
"iter 406, ep 161\r",
"iter 407, ep 162\r",
"iter 408, ep 163\r",
"iter 409, ep 163\r",
"iter 410, ep 163\r",
"iter 411, ep 163\r",
"iter 412, ep 163\r",
"iter 413, ep 164\r",
"iter 414, ep 165\r",
"iter 415, ep 166\r",
"iter 416, ep 167\r",
"iter 417, ep 167\r",
"iter 418, ep 168\r",
"iter 419, ep 169\r",
"iter 420, ep 169\r",
"iter 421, ep 169\r",
"iter 422, ep 170\r",
"iter 423, ep 171\r",
"iter 424, ep 171\r",
"iter 425, ep 171\r",
"iter 426, ep 172\r",
"iter 427, ep 173\r",
"iter 428, ep 174\r",
"iter 429, ep 175\r",
"iter 430, ep 175\r",
"iter 431, ep 175\r",
"iter 432, ep 175\r",
"iter 433, ep 175\r",
"iter 434, ep 176\r",
"iter 435, ep 176\r",
"iter 436, ep 177\r",
"iter 437, ep 178\r",
"iter 438, ep 179\r",
"iter 439, ep 179\r",
"iter 440, ep 179\r",
"iter 441, ep 180\r",
"iter 442, ep 180\r",
"iter 443, ep 180\r",
"iter 444, ep 181\r",
"iter 445, ep 181\r",
"iter 446, ep 181\r",
"iter 447, ep 181\r",
"iter 448, ep 182\r",
"iter 449, ep 183\r",
"iter 450, ep 183\r",
"iter 451, ep 184\r",
"iter 452, ep 185\r",
"iter 453, ep 186\r",
"iter 454, ep 186\r",
"iter 455, ep 186\r",
"iter 456, ep 186\r",
"iter 457, ep 186\r",
"iter 458, ep 187\r",
"iter 459, ep 187\r",
"iter 460, ep 188\r",
"iter 461, ep 189\r",
"iter 462, ep 189\r",
"iter 463, ep 189\r",
"iter 464, ep 190\r",
"iter 465, ep 191\r",
"iter 466, ep 191\r",
"iter 467, ep 191\r",
"iter 468, ep 191\r",
"iter 469, ep 191\r",
"iter 470, ep 191\r",
"iter 471, ep 191\r",
"iter 472, ep 192\r",
"iter 473, ep 192\r",
"iter 474, ep 192\r",
"iter 475, ep 193\r",
"iter 476, ep 193\r",
"iter 477, ep 193\r",
"iter 478, ep 193\r",
"iter 479, ep 193\r",
"iter 480, ep 194\r",
"iter 481, ep 194\r",
"iter 482, ep 195\r",
"iter 483, ep 196\r",
"iter 484, ep 196\r",
"iter 485, ep 196\r",
"iter 486, ep 196\r",
"iter 487, ep 196\r",
"iter 488, ep 197\r",
"iter 489, ep 197\r",
"iter 490, ep 198\r",
"iter 491, ep 198\r",
"iter 492, ep 198\r",
"iter 493, ep 199\r",
"iter 494, ep 199\r",
"iter 495, ep 199\r",
"iter 496, ep 200\r",
"iter 497, ep 200\r",
"iter 498, ep 201\r",
"iter 499, ep 202\r",
"iter 500, ep 202\r",
"iter 501, ep 203\r",
"iter 502, ep 203\r",
"iter 503, ep 203\r",
"iter 504, ep 204\r",
"iter 505, ep 205\r",
"iter 506, ep 205\r",
"iter 507, ep 205\r",
"iter 508, ep 205\r",
"iter 509, ep 206\r",
"iter 510, ep 206\r",
"iter 511, ep 206\r",
"iter 512, ep 206\r",
"iter 513, ep 206\r",
"iter 514, ep 207\r",
"iter 515, ep 207\r",
"iter 516, ep 208\r",
"iter 517, ep 208\r",
"iter 518, ep 209\r",
"iter 519, ep 209\r",
"iter 520, ep 210\r",
"iter 521, ep 210\r",
"iter 522, ep 210\r",
"iter 523, ep 211\r",
"iter 524, ep 211\r",
"iter 525, ep 211\r",
"iter 526, ep 211\r",
"iter 527, ep 211\r",
"iter 528, ep 211\r",
"iter 529, ep 211\r",
"iter 530, ep 211\r",
"iter 531, ep 211\r",
"iter 532, ep 212\r",
"iter 533, ep 213\r",
"iter 534, ep 213\r",
"iter 535, ep 214\r",
"iter 536, ep 214\r",
"iter 537, ep 215\r",
"iter 538, ep 216\r",
"iter 539, ep 217\r",
"iter 540, ep 218\r",
"iter 541, ep 219\r",
"iter 542, ep 219\r",
"iter 543, ep 219\r",
"iter 544, ep 220\r",
"iter 545, ep 220\r",
"iter 546, ep 220\r",
"iter 547, ep 220\r",
"iter 548, ep 220\r",
"iter 549, ep 220\r",
"iter 550, ep 220\r",
"iter 551, ep 220\r",
"iter 552, ep 221\r",
"iter 553, ep 221\r",
"iter 554, ep 222\r",
"iter 555, ep 222\r",
"iter 556, ep 223\r",
"iter 557, ep 223\r",
"iter 558, ep 223\r",
"iter 559, ep 223\r",
"iter 560, ep 224\r",
"iter 561, ep 225\r",
"iter 562, ep 226\r",
"iter 563, ep 226\r",
"iter 564, ep 226\r",
"iter 565, ep 226\r",
"iter 566, ep 227\r",
"iter 567, ep 228\r",
"iter 568, ep 228\r",
"iter 569, ep 229\r",
"iter 570, ep 229\r",
"iter 571, ep 229\r",
"iter 572, ep 230\r",
"iter 573, ep 230\r",
"iter 574, ep 231\r",
"iter 575, ep 231\r",
"iter 576, ep 232\r",
"iter 577, ep 232\r",
"iter 578, ep 233\r",
"iter 579, ep 234\r",
"iter 580, ep 234\r",
"iter 581, ep 235\r",
"iter 582, ep 235\r",
"iter 583, ep 236\r",
"iter 584, ep 237\r",
"iter 585, ep 237\r",
"iter 586, ep 237\r",
"iter 587, ep 237\r",
"iter 588, ep 237\r",
"iter 589, ep 237\r",
"iter 590, ep 237\r",
"iter 591, ep 237\r",
"iter 592, ep 237\r",
"iter 593, ep 238\r",
"iter 594, ep 238\r",
"iter 595, ep 239\r",
"iter 596, ep 239\r",
"iter 597, ep 240\r",
"iter 598, ep 240\r",
"iter 599, ep 241\r",
"iter 600, ep 242\r",
"iter 601, ep 242\r",
"iter 602, ep 243\r",
"iter 603, ep 243\r",
"iter 604, ep 244\r",
"iter 605, ep 244\r",
"iter 606, ep 244\r",
"iter 607, ep 244\r",
"iter 608, ep 245\r",
"iter 609, ep 245\r",
"iter 610, ep 246\r",
"iter 611, ep 247\r",
"iter 612, ep 247\r",
"iter 613, ep 248\r",
"iter 614, ep 249\r",
"iter 615, ep 250\r",
"iter 616, ep 251\r",
"iter 617, ep 251\r",
"iter 618, ep 251\r",
"iter 619, ep 251\r",
"iter 620, ep 251\r",
"iter 621, ep 251\r",
"iter 622, ep 252\r",
"iter 623, ep 252\r",
"iter 624, ep 252\r",
"iter 625, ep 253\r",
"iter 626, ep 254\r",
"iter 627, ep 254\r",
"iter 628, ep 255\r",
"iter 629, ep 256\r",
"iter 630, ep 257\r",
"iter 631, ep 257\r",
"iter 632, ep 258\r",
"iter 633, ep 259\r",
"iter 634, ep 259\r",
"iter 635, ep 260\r",
"iter 636, ep 261\r",
"iter 637, ep 261\r",
"iter 638, ep 262\r",
"iter 639, ep 263\r",
"iter 640, ep 264\r",
"iter 641, ep 265\r",
"iter 642, ep 266\r",
"iter 643, ep 266\r",
"iter 644, ep 267\r",
"iter 645, ep 267\r",
"iter 646, ep 268\r",
"iter 647, ep 269\r",
"iter 648, ep 269\r",
"iter 649, ep 270\r",
"iter 650, ep 270\r",
"iter 651, ep 270\r",
"iter 652, ep 270\r",
"iter 653, ep 271\r",
"iter 654, ep 271\r",
"iter 655, ep 272\r",
"iter 656, ep 272\r",
"iter 657, ep 272\r",
"iter 658, ep 272\r",
"iter 659, ep 273\r",
"iter 660, ep 273\r",
"iter 661, ep 273\r",
"iter 662, ep 273\r",
"iter 663, ep 273\r",
"iter 664, ep 273\r",
"iter 665, ep 274\r",
"iter 666, ep 275\r",
"iter 667, ep 275\r",
"iter 668, ep 275\r",
"iter 669, ep 275\r",
"iter 670, ep 276\r",
"iter 671, ep 276\r",
"iter 672, ep 276\r",
"iter 673, ep 276\r",
"iter 674, ep 277\r",
"iter 675, ep 278\r",
"iter 676, ep 278\r",
"iter 677, ep 278\r",
"iter 678, ep 279\r",
"iter 679, ep 279\r",
"iter 680, ep 280\r",
"iter 681, ep 280\r",
"iter 682, ep 281\r",
"iter 683, ep 281\r",
"iter 684, ep 281\r",
"iter 685, ep 282\r",
"iter 686, ep 283\r",
"iter 687, ep 283\r",
"iter 688, ep 284\r",
"iter 689, ep 284\r",
"iter 690, ep 284\r",
"iter 691, ep 284\r",
"iter 692, ep 285\r",
"iter 693, ep 285\r",
"iter 694, ep 286\r",
"iter 695, ep 286\r",
"iter 696, ep 286\r",
"iter 697, ep 286\r",
"iter 698, ep 286\r",
"iter 699, ep 286\r",
"iter 700, ep 287\r",
"iter 701, ep 287\r",
"iter 702, ep 288\r",
"iter 703, ep 288\r",
"iter 704, ep 288\r",
"iter 705, ep 289\r",
"iter 706, ep 290\r",
"iter 707, ep 290\r",
"iter 708, ep 291\r",
"iter 709, ep 292\r",
"iter 710, ep 292\r",
"iter 711, ep 293\r",
"iter 712, ep 293\r",
"iter 713, ep 294\r",
"iter 714, ep 294\r",
"iter 715, ep 295\r",
"iter 716, ep 295\r",
"iter 717, ep 295\r",
"iter 718, ep 295\r",
"iter 719, ep 296\r",
"iter 720, ep 297\r",
"iter 721, ep 298\r",
"iter 722, ep 299\r",
"iter 723, ep 299\r",
"iter 724, ep 299\r",
"iter 725, ep 299\r",
"iter 726, ep 299\r",
"iter 727, ep 299\r",
"iter 728, ep 300\r",
"iter 729, ep 300\r",
"iter 730, ep 301\r",
"iter 731, ep 301\r",
"iter 732, ep 301\r",
"iter 733, ep 301\r",
"iter 734, ep 301\r",
"iter 735, ep 301\r",
"iter 736, ep 302\r",
"iter 737, ep 303\r",
"iter 738, ep 303\r",
"iter 739, ep 304\r",
"iter 740, ep 304\r",
"iter 741, ep 304\r",
"iter 742, ep 304\r",
"iter 743, ep 304\r",
"iter 744, ep 305\r",
"iter 745, ep 306\r",
"iter 746, ep 306\r",
"iter 747, ep 307\r",
"iter 748, ep 307\r",
"iter 749, ep 308\r",
"iter 750, ep 308\r",
"iter 751, ep 308\r",
"iter 752, ep 309\r",
"iter 753, ep 310\r",
"iter 754, ep 310\r",
"iter 755, ep 310\r",
"iter 756, ep 311\r",
"iter 757, ep 312\r",
"iter 758, ep 312\r",
"iter 759, ep 313\r",
"iter 760, ep 313\r",
"iter 761, ep 313\r",
"iter 762, ep 313\r",
"iter 763, ep 313\r",
"iter 764, ep 314\r",
"iter 765, ep 315\r",
"iter 766, ep 316\r",
"iter 767, ep 316\r",
"iter 768, ep 316\r",
"iter 769, ep 316"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-08-27 00:30:11,433] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000343.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"iter 1018, ep 400, ep reward 0.5, ep steps 8\n",
"iter 1361, ep 495"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-08-27 00:30:12,899] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000512.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"iter 1383, ep 500, ep reward -0.5, ep steps 1\n",
"iter 1780, ep 600, ep reward -0.5, ep steps 6\n",
"iter 2130, ep 700, ep reward 0.5, ep steps 4\n",
"iter 2213, ep 722"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-08-27 00:30:14,838] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video000729.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"iter 2530, ep 800, ep reward -0.5, ep steps 4\n",
"iter 2981, ep 900, ep reward -1.0, ep steps 11\n",
"iter 3410, ep 989"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-08-27 00:30:17,720] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video001000.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"iter 3466, ep 1000, ep reward 3.0, ep steps 3\n",
"iter 3911, ep 1100, ep reward -0.5, ep steps 2\n",
"iter 4298, ep 1200, ep reward 0.5, ep steps 3\n",
"iter 4730, ep 1300, ep reward -1.0, ep steps 9\n",
"iter 5099, ep 1400, ep reward -0.5, ep steps 3\n",
"iter 5484, ep 1500, ep reward 0.5, ep steps 3\n",
"iter 5900, ep 1600, ep reward 4.0, ep steps 4\n",
"iter 6333, ep 1700, ep reward 0.5, ep steps 3\n",
"iter 6775, ep 1800, ep reward 2.0, ep steps 13\n",
"iter 7154, ep 1900, ep reward 5.0, ep steps 5\n",
"iter 7946, ep 1996"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-08-27 00:30:29,364] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video002000.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"iter 7983, ep 2000, ep reward -0.5, ep steps 2\n",
"iter 8536, ep 2100, ep reward -0.5, ep steps 3\n",
"iter 8932, ep 2200, ep reward 5.5, ep steps 8\n",
"iter 9557, ep 2300, ep reward 14.0, ep steps 14\n",
"iter 11332, ep 2400, ep reward 24.0, ep steps 24\n",
"iter 13512, ep 2500, ep reward 0.5, ep steps 3\n",
"iter 14469, ep 2600, ep reward 4.5, ep steps 7\n",
"iter 16248, ep 2700, ep reward 31.0, ep steps 31\n",
"iter 19349, ep 2800, ep reward 32.0, ep steps 32\n",
"iter 22447, ep 2900, ep reward 32.0, ep steps 32\n",
"iter 25479, ep 2998"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-08-27 00:31:17,244] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video003000.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"iter 25543, ep 3000, ep reward 32.0, ep steps 32\n",
"iter 28633, ep 3100, ep reward 32.0, ep steps 32\n",
"iter 31742, ep 3200, ep reward 32.0, ep steps 32\n",
"iter 34853, ep 3300, ep reward 30.0, ep steps 30\n",
"iter 37933, ep 3400, ep reward 30.0, ep steps 30\n",
"iter 41039, ep 3500, ep reward 31.0, ep steps 31\n",
"iter 44139, ep 3600, ep reward 32.0, ep steps 32\n",
"iter 47246, ep 3700, ep reward 32.0, ep steps 32\n",
"iter 50344, ep 3800, ep reward 31.0, ep steps 31\n",
"iter 53448, ep 3900, ep reward 31.0, ep steps 31\n",
"iter 56517, ep 3999"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-08-27 00:32:41,871] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video004000.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"iter 56556, ep 4000, ep reward 30.0, ep steps 30\n",
"iter 59677, ep 4100, ep reward 31.0, ep steps 31\n",
"iter 62776, ep 4200, ep reward 32.0, ep steps 32\n",
"iter 65881, ep 4300, ep reward 31.0, ep steps 31\n",
"iter 68980, ep 4400, ep reward 32.0, ep steps 32\n",
"iter 72094, ep 4500, ep reward 30.0, ep steps 30\n",
"iter 75179, ep 4600, ep reward 31.0, ep steps 31\n",
"iter 78282, ep 4700, ep reward 32.0, ep steps 32\n",
"iter 81372, ep 4800, ep reward 32.0, ep steps 32\n",
"iter 84463, ep 4900, ep reward 30.0, ep steps 30\n",
"iter 87530, ep 4999"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-08-27 00:34:07,657] Starting new video recorder writing to /Users/winter/Google Drive/handson-ml/tmp/openaigym.video.0.1673.video005000.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"iter 87531, ep 5000\r",
"iter 87532, ep 5000\r",
"iter 87533, ep 5000\r",
"iter 87534, ep 5000\r",
"iter 87535, ep 5000\r",
"iter 87536, ep 5000\r",
"iter 87537, ep 5000\r",
"iter 87538, ep 5000\r",
"iter 87539, ep 5000\r",
"iter 87540, ep 5000\r",
"iter 87541, ep 5000\r",
"iter 87542, ep 5000\r",
"iter 87543, ep 5000\r",
"iter 87544, ep 5000\r",
"iter 87545, ep 5000\r",
"iter 87546, ep 5000\r",
"iter 87547, ep 5000\r",
"iter 87548, ep 5000\r",
"iter 87549, ep 5000\r",
"iter 87550, ep 5000\r",
"iter 87551, ep 5000\r",
"iter 87552, ep 5000\r",
"iter 87553, ep 5000\r",
"iter 87554, ep 5000\r",
"iter 87555, ep 5000\r",
"iter 87556, ep 5000\r",
"iter 87557, ep 5000\r",
"iter 87558, ep 5000\r",
"iter 87559, ep 5000\r",
"iter 87560, ep 5000\r",
"iter 87561, ep 5000\r",
"iter 87562, ep 5000, ep reward 32.0, ep steps 32\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-08-27 00:34:07,873] Finished writing results. You can upload them to the scoreboard via gym.upload('/Users/winter/Google Drive/handson-ml/tmp')\n"
]
}
],
"source": [
"gamma = 0.99\n",
"max_episode = 5000\n",
"batch_size = 256\n",
"memory_warmup = batch_size*3\n",
"memory = Memory()\n",
"explore_eps = 1000\n",
"\n",
"env = gym.make('Copy-v0')\n",
"env = wrappers.Monitor(env,'./tmp/',force=True)\n",
"obs = env.reset()\n",
"init = tf.global_variables_initializer()\n",
"with tf.Session() as sess:\n",
" init.run()\n",
" iteration = 0\n",
" episode = 0\n",
" episode_reward = 0\n",
" episode_steps = 0\n",
" while episode <= max_episode:\n",
" print('\\riter {}, ep {}'.format(iteration,episode),end='')\n",
" p = episode/explore_eps\n",
" if np.random.rand() > p:\n",
" action = env.action_space.sample()\n",
" else:\n",
" Q_val = Q.eval(feed_dict={X:np.reshape(obs,[1,-1])})\n",
" action = np.argmax(Q_val,axis=1)\n",
" action = np.unravel_index(action[0],[2,2,5])\n",
" next_obs,reward,done,_ = env.step(action)\n",
" memory.append([obs,np.ravel_multi_index(action,[2,2,5]),reward,next_obs,done])\n",
" if iteration > memory_warmup:\n",
" memory_batch = memory.sample_batch(batch_size)\n",
" extract_mem = lambda k: np.array([item[k] for item in memory_batch])\n",
" obs_batch = extract_mem(0)\n",
" action_batch = extract_mem(1)\n",
" reward_batch = extract_mem(2)\n",
" next_obs_batch = extract_mem(3)\n",
" done_batch = extract_mem(4)\n",
" Qnext_val = Q.eval(feed_dict={X:np.expand_dims(next_obs_batch,axis=1)})\n",
" Qnext_val = np.max(Qnext_val,axis=1,keepdims=True)\n",
" Qexpected_batch = reward_batch + gamma*(1-done_batch)*Qnext_val.ravel()\n",
" Qexpected_batch = np.expand_dims(Qexpected_batch,1)\n",
" train_op.run(feed_dict={X:np.expand_dims(obs_batch,axis=1),action_ph:action_batch,Qexpected:Qexpected_batch})\n",
" iteration += 1\n",
" episode_reward += reward\n",
" episode_steps += 1\n",
" if done:\n",
" if iteration>memory_warmup and episode%100==0:\n",
" print(', ep reward {}, ep steps {}'.format(episode_reward, episode_steps))\n",
" episode_reward = 0\n",
" episode_steps = 0\n",
" episode += 1\n",
" obs = env.reset()\n",
" else:\n",
" obs = next_obs\n",
"env.close()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[2017-08-27 00:34:07,916] [Copy-v0] Uploading 5001 episodes of training data\n",
"[2017-08-27 00:34:10,513] [Copy-v0] Uploading videos of 15 training episodes (3807 bytes)\n",
"[2017-08-27 00:34:10,859] [Copy-v0] Creating evaluation object from ./tmp/ with learning curve and training video\n",
"[2017-08-27 00:34:11,425] \n",
"****************************************************\n",
"You successfully uploaded your evaluation on Copy-v0 to\n",
"OpenAI Gym! You can find it at:\n",
"\n",
" https://gym.openai.com/evaluations/eval_qdzwvK4RNa1Dx71JtH2g\n",
"\n",
"****************************************************\n"
]
}
],
"source": [
"gym.upload('./tmp/',api_key='sk_BlwjttPKR6ZsXVrObENYA')"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda env:tensorflow]",
"language": "python",
"name": "conda-env-tensorflow-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment