HTLife · September 24, 2019 15:42
diff --git a/CNNLSTM.ipynb b/CNNLSTM.ipynb
 {
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CNN LSTM\n",
    "\n",
    "DeepVO like neural network structure example\n",
    "\n",
    ">Wang, S., Clark, R., Wen, H., & Trigoni, N. (2017). DeepVO: Towards end-to-end visual odometry with deep Recurrent Convolutional Neural Networks. Proceedings - IEEE International Conference on Robotics and Automation, 2043–2050. https://doi.org/10.1109/ICRA.2017.7989236\n",
    "\n",
    "![./deepvo.png](https://gist.github.com/HTLife/25c0cd362faa91477b8f28f6033adb45/raw/aff95313bc13176c69825f5871467469cf85a4d5/deepvo.png)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import related libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.models import Sequential\n",
    "from keras.layers import Activation, MaxPooling2D, Dropout, LSTM, Flatten, Merge, TimeDistributed\n",
    "import numpy as np\n",
    "\n",
    "from keras.layers import Concatenate\n",
    "\n",
    "from keras.layers.convolutional import Conv2D"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Define data dimension\n",
    "\n",
    "Assume we have gray scale image with dimension:\n",
    "  * (channel, img_height, img_width) = (1, 540, 960)\n",
    "  \n",
    "Using time-distributed CNN to process 3 images within 1 time frame.\n",
    "  * (batches, images, channel, img_height, img_width) = (1, 3, 1, 540, 960)\n",
    "  \n",
    "![cnnlstm.png](https://gist.github.com/HTLife/25c0cd362faa91477b8f28f6033adb45/raw/aff95313bc13176c69825f5871467469cf85a4d5/cnnlstm.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-02-13T02:44:39.007942Z",
     "start_time": "2018-02-13T02:44:36.124672Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "time_distributed_70 (TimeDis (None, None, 1, 540, 40)  345640    \n",
      "_________________________________________________________________\n",
      "activation_23 (Activation)   (None, None, 1, 540, 40)  0         \n",
      "_________________________________________________________________\n",
      "time_distributed_71 (TimeDis (None, None, 1, 270, 20)  0         \n",
      "_________________________________________________________________\n",
      "dropout_22 (Dropout)         (None, None, 1, 270, 20)  0         \n",
      "_________________________________________________________________\n",
      "time_distributed_72 (TimeDis (None, None, 5400)        0         \n",
      "_________________________________________________________________\n",
      "lstm_23 (LSTM)               (None, None, 3)           64848     \n",
      "_________________________________________________________________\n",
      "lstm_24 (LSTM)               (None, 3)                 84        \n",
      "=================================================================\n",
      "Total params: 410,572\n",
      "Trainable params: 410,572\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n",
      "Epoch 1/2\n",
      "1/1 [==============================] - 1s 1s/step - loss: 1.0693\n",
      "Epoch 2/2\n",
      "1/1 [==============================] - 0s 9ms/step - loss: 0.7938\n",
      "1/1 [==============================] - 1s 621ms/step\n",
      "[[ 0.11569256  0.07892055  0.23299485]]\n"
     ]
    }
   ],
   "source": [
    "\n",
    "\n",
    "sequence_lengths = None\n",
    "\n",
    "numberOfVideos = 10\n",
    "videoLength = 3\n",
    "numberOfPrediction = 3\n",
    "\n",
    "def defModel():\n",
    "\n",
    "    model = Sequential()\n",
    "    model.add(\n",
    "        TimeDistributed(\n",
    "            Conv2D(40, (3, 3), padding='same'),\n",
    "            input_shape=(sequence_lengths, 1, 540, 960)))\n",
    "    model.add(Activation('relu'))\n",
    "    model.add(\n",
    "        TimeDistributed(\n",
    "            MaxPooling2D(data_format=\"channels_first\", pool_size=(2, 2))))\n",
    "    model.add(Dropout(0.2))\n",
    "\n",
    "    model.add(TimeDistributed(Flatten()))\n",
    "    model.add(LSTM(3, return_sequences=True))\n",
    "    model.add(LSTM(3))    \n",
    "\n",
    "    model.compile(loss='mse', optimizer='adam')\n",
    "    model.summary()\n",
    "    return model\n",
    "\n",
    "\n",
    "def gen():\n",
    "    x_data = np.random.random((numberOfVideos, videoLength, 1, 540, 960))\n",
    "    y_data = np.ones((1, numberOfPrediction))  \n",
    "    for video in range(numberOfVideos):\n",
    "        x_train = x_data[video:video + 1]\n",
    "        y_train = y_data\n",
    "        yield (x_train, y_train)\n",
    "\n",
    "\n",
    "def main():\n",
    "    model = defModel()\n",
    "\n",
    "    x_train = []\n",
    "    seq_len = 15\n",
    "    for i in range(50):\n",
    "        x_train.append(x_data[i * 5:i * 5 + seq_len, :, :, :])\n",
    "    x_train = np.asarray(x_train, dtype='float32')\n",
    "\n",
    "    model.fit_generator(generator=gen(), steps_per_epoch=1, epochs=2)\n",
    "    \n",
    "    predicted = model.predict_generator(\n",
    "        gen(), \n",
    "        steps=1,\n",
    "        verbose=1)\n",
    "    print(predicted)\n",
    "\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    main()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
diff --git a/cnnlstm.png b/cnnlstm.png
diff --git a/deepvo.png b/deepvo.png
	{
	"cells": [
	{
	"attachments": {},
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# CNN LSTM\n",
	"\n",
	"DeepVO like neural network structure example\n",
	"\n",
	">Wang, S., Clark, R., Wen, H., & Trigoni, N. (2017). DeepVO: Towards end-to-end visual odometry with deep Recurrent Convolutional Neural Networks. Proceedings - IEEE International Conference on Robotics and Automation, 2043–2050. https://doi.org/10.1109/ICRA.2017.7989236\n",
	"\n",
	"![./deepvo.png](https://gist.github.com/HTLife/25c0cd362faa91477b8f28f6033adb45/raw/aff95313bc13176c69825f5871467469cf85a4d5/deepvo.png)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Import related libraries"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"from keras.models import Sequential\n",
	"from keras.layers import Activation, MaxPooling2D, Dropout, LSTM, Flatten, Merge, TimeDistributed\n",
	"import numpy as np\n",
	"\n",
	"from keras.layers import Concatenate\n",
	"\n",
	"from keras.layers.convolutional import Conv2D"
	]
	},
	{
	"attachments": {},
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Define data dimension\n",
	"\n",
	"Assume we have gray scale image with dimension:\n",
	" * (channel, img_height, img_width) = (1, 540, 960)\n",
	" \n",
	"Using time-distributed CNN to process 3 images within 1 time frame.\n",
	" * (batches, images, channel, img_height, img_width) = (1, 3, 1, 540, 960)\n",
	" \n",
	"![cnnlstm.png](https://gist.github.com/HTLife/25c0cd362faa91477b8f28f6033adb45/raw/aff95313bc13176c69825f5871467469cf85a4d5/cnnlstm.png)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2018-02-13T02:44:39.007942Z",
	"start_time": "2018-02-13T02:44:36.124672Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"_________________________________________________________________\n",
	"Layer (type) Output Shape Param # \n",
	"=================================================================\n",
	"time_distributed_70 (TimeDis (None, None, 1, 540, 40) 345640 \n",
	"_________________________________________________________________\n",
	"activation_23 (Activation) (None, None, 1, 540, 40) 0 \n",
	"_________________________________________________________________\n",
	"time_distributed_71 (TimeDis (None, None, 1, 270, 20) 0 \n",
	"_________________________________________________________________\n",
	"dropout_22 (Dropout) (None, None, 1, 270, 20) 0 \n",
	"_________________________________________________________________\n",
	"time_distributed_72 (TimeDis (None, None, 5400) 0 \n",
	"_________________________________________________________________\n",
	"lstm_23 (LSTM) (None, None, 3) 64848 \n",
	"_________________________________________________________________\n",
	"lstm_24 (LSTM) (None, 3) 84 \n",
	"=================================================================\n",
	"Total params: 410,572\n",
	"Trainable params: 410,572\n",
	"Non-trainable params: 0\n",
	"_________________________________________________________________\n",
	"Epoch 1/2\n",
	"1/1 [==============================] - 1s 1s/step - loss: 1.0693\n",
	"Epoch 2/2\n",
	"1/1 [==============================] - 0s 9ms/step - loss: 0.7938\n",
	"1/1 [==============================] - 1s 621ms/step\n",
	"[[ 0.11569256 0.07892055 0.23299485]]\n"
	]
	}
	],
	"source": [
	"\n",
	"\n",
	"sequence_lengths = None\n",
	"\n",
	"numberOfVideos = 10\n",
	"videoLength = 3\n",
	"numberOfPrediction = 3\n",
	"\n",
	"def defModel():\n",
	"\n",
	" model = Sequential()\n",
	" model.add(\n",
	" TimeDistributed(\n",
	" Conv2D(40, (3, 3), padding='same'),\n",
	" input_shape=(sequence_lengths, 1, 540, 960)))\n",
	" model.add(Activation('relu'))\n",
	" model.add(\n",
	" TimeDistributed(\n",
	" MaxPooling2D(data_format=\"channels_first\", pool_size=(2, 2))))\n",
	" model.add(Dropout(0.2))\n",
	"\n",
	" model.add(TimeDistributed(Flatten()))\n",
	" model.add(LSTM(3, return_sequences=True))\n",
	" model.add(LSTM(3)) \n",
	"\n",
	" model.compile(loss='mse', optimizer='adam')\n",
	" model.summary()\n",
	" return model\n",
	"\n",
	"\n",
	"def gen():\n",
	" x_data = np.random.random((numberOfVideos, videoLength, 1, 540, 960))\n",
	" y_data = np.ones((1, numberOfPrediction)) \n",
	" for video in range(numberOfVideos):\n",
	" x_train = x_data[video:video + 1]\n",
	" y_train = y_data\n",
	" yield (x_train, y_train)\n",
	"\n",
	"\n",
	"def main():\n",
	" model = defModel()\n",
	"\n",
	" x_train = []\n",
	" seq_len = 15\n",
	" for i in range(50):\n",
	" x_train.append(x_data[i * 5:i * 5 + seq_len, :, :, :])\n",
	" x_train = np.asarray(x_train, dtype='float32')\n",
	"\n",
	" model.fit_generator(generator=gen(), steps_per_epoch=1, epochs=2)\n",
	" \n",
	" predicted = model.predict_generator(\n",
	" gen(), \n",
	" steps=1,\n",
	" verbose=1)\n",
	" print(predicted)\n",
	"\n",
	"\n",
	"if __name__ == \"__main__\":\n",
	" main()"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2"
	},
	"varInspector": {
	"cols": {
	"lenName": 16,
	"lenType": 16,
	"lenVar": 40
	},
	"kernels_config": {
	"python": {
	"delete_cmd_postfix": "",
	"delete_cmd_prefix": "del ",
	"library": "var_list.py",
	"varRefreshCmd": "print(var_dic_list())"
	},
	"r": {
	"delete_cmd_postfix": ") ",
	"delete_cmd_prefix": "rm(",
	"library": "var_list.r",
	"varRefreshCmd": "cat(var_dic_list()) "
	}
	},
	"types_to_exclude": [
	"module",
	"function",
	"builtin_function_or_method",
	"instance",
	"_Feature"
	],
	"window_display": false
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}
No results found