Last active
September 24, 2019 15:42
-
-
Save HTLife/25c0cd362faa91477b8f28f6033adb45 to your computer and use it in GitHub Desktop.
DeepVO like neural network structure example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "attachments": {}, | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# CNN LSTM\n", | |
| "\n", | |
| "DeepVO like neural network structure example\n", | |
| "\n", | |
| ">Wang, S., Clark, R., Wen, H., & Trigoni, N. (2017). DeepVO: Towards end-to-end visual odometry with deep Recurrent Convolutional Neural Networks. Proceedings - IEEE International Conference on Robotics and Automation, 2043–2050. https://doi.org/10.1109/ICRA.2017.7989236\n", | |
| "\n", | |
| "" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Import related libraries" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from keras.models import Sequential\n", | |
| "from keras.layers import Activation, MaxPooling2D, Dropout, LSTM, Flatten, Merge, TimeDistributed\n", | |
| "import numpy as np\n", | |
| "\n", | |
| "from keras.layers import Concatenate\n", | |
| "\n", | |
| "from keras.layers.convolutional import Conv2D" | |
| ] | |
| }, | |
| { | |
| "attachments": {}, | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Define data dimension\n", | |
| "\n", | |
| "Assume we have gray scale image with dimension:\n", | |
| " * (channel, img_height, img_width) = (1, 540, 960)\n", | |
| " \n", | |
| "Using time-distributed CNN to process 3 images within 1 time frame.\n", | |
| " * (batches, images, channel, img_height, img_width) = (1, 3, 1, 540, 960)\n", | |
| " \n", | |
| "" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 23, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2018-02-13T02:44:39.007942Z", | |
| "start_time": "2018-02-13T02:44:36.124672Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "_________________________________________________________________\n", | |
| "Layer (type) Output Shape Param # \n", | |
| "=================================================================\n", | |
| "time_distributed_70 (TimeDis (None, None, 1, 540, 40) 345640 \n", | |
| "_________________________________________________________________\n", | |
| "activation_23 (Activation) (None, None, 1, 540, 40) 0 \n", | |
| "_________________________________________________________________\n", | |
| "time_distributed_71 (TimeDis (None, None, 1, 270, 20) 0 \n", | |
| "_________________________________________________________________\n", | |
| "dropout_22 (Dropout) (None, None, 1, 270, 20) 0 \n", | |
| "_________________________________________________________________\n", | |
| "time_distributed_72 (TimeDis (None, None, 5400) 0 \n", | |
| "_________________________________________________________________\n", | |
| "lstm_23 (LSTM) (None, None, 3) 64848 \n", | |
| "_________________________________________________________________\n", | |
| "lstm_24 (LSTM) (None, 3) 84 \n", | |
| "=================================================================\n", | |
| "Total params: 410,572\n", | |
| "Trainable params: 410,572\n", | |
| "Non-trainable params: 0\n", | |
| "_________________________________________________________________\n", | |
| "Epoch 1/2\n", | |
| "1/1 [==============================] - 1s 1s/step - loss: 1.0693\n", | |
| "Epoch 2/2\n", | |
| "1/1 [==============================] - 0s 9ms/step - loss: 0.7938\n", | |
| "1/1 [==============================] - 1s 621ms/step\n", | |
| "[[ 0.11569256 0.07892055 0.23299485]]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "\n", | |
| "\n", | |
| "sequence_lengths = None\n", | |
| "\n", | |
| "numberOfVideos = 10\n", | |
| "videoLength = 3\n", | |
| "numberOfPrediction = 3\n", | |
| "\n", | |
| "def defModel():\n", | |
| "\n", | |
| " model = Sequential()\n", | |
| " model.add(\n", | |
| " TimeDistributed(\n", | |
| " Conv2D(40, (3, 3), padding='same'),\n", | |
| " input_shape=(sequence_lengths, 1, 540, 960)))\n", | |
| " model.add(Activation('relu'))\n", | |
| " model.add(\n", | |
| " TimeDistributed(\n", | |
| " MaxPooling2D(data_format=\"channels_first\", pool_size=(2, 2))))\n", | |
| " model.add(Dropout(0.2))\n", | |
| "\n", | |
| " model.add(TimeDistributed(Flatten()))\n", | |
| " model.add(LSTM(3, return_sequences=True))\n", | |
| " model.add(LSTM(3)) \n", | |
| "\n", | |
| " model.compile(loss='mse', optimizer='adam')\n", | |
| " model.summary()\n", | |
| " return model\n", | |
| "\n", | |
| "\n", | |
| "def gen():\n", | |
| " x_data = np.random.random((numberOfVideos, videoLength, 1, 540, 960))\n", | |
| " y_data = np.ones((1, numberOfPrediction)) \n", | |
| " for video in range(numberOfVideos):\n", | |
| " x_train = x_data[video:video + 1]\n", | |
| " y_train = y_data\n", | |
| " yield (x_train, y_train)\n", | |
| "\n", | |
| "\n", | |
| "def main():\n", | |
| " model = defModel()\n", | |
| "\n", | |
| " x_train = []\n", | |
| " seq_len = 15\n", | |
| " for i in range(50):\n", | |
| " x_train.append(x_data[i * 5:i * 5 + seq_len, :, :, :])\n", | |
| " x_train = np.asarray(x_train, dtype='float32')\n", | |
| "\n", | |
| " model.fit_generator(generator=gen(), steps_per_epoch=1, epochs=2)\n", | |
| " \n", | |
| " predicted = model.predict_generator(\n", | |
| " gen(), \n", | |
| " steps=1,\n", | |
| " verbose=1)\n", | |
| " print(predicted)\n", | |
| "\n", | |
| "\n", | |
| "if __name__ == \"__main__\":\n", | |
| " main()" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.2" | |
| }, | |
| "varInspector": { | |
| "cols": { | |
| "lenName": 16, | |
| "lenType": 16, | |
| "lenVar": 40 | |
| }, | |
| "kernels_config": { | |
| "python": { | |
| "delete_cmd_postfix": "", | |
| "delete_cmd_prefix": "del ", | |
| "library": "var_list.py", | |
| "varRefreshCmd": "print(var_dic_list())" | |
| }, | |
| "r": { | |
| "delete_cmd_postfix": ") ", | |
| "delete_cmd_prefix": "rm(", | |
| "library": "var_list.r", | |
| "varRefreshCmd": "cat(var_dic_list()) " | |
| } | |
| }, | |
| "types_to_exclude": [ | |
| "module", | |
| "function", | |
| "builtin_function_or_method", | |
| "instance", | |
| "_Feature" | |
| ], | |
| "window_display": false | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment

