Skip to content

Instantly share code, notes, and snippets.

@HTLife
Last active September 24, 2019 15:42
Show Gist options
  • Select an option

  • Save HTLife/25c0cd362faa91477b8f28f6033adb45 to your computer and use it in GitHub Desktop.

Select an option

Save HTLife/25c0cd362faa91477b8f28f6033adb45 to your computer and use it in GitHub Desktop.
DeepVO like neural network structure example
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# CNN LSTM\n",
"\n",
"DeepVO like neural network structure example\n",
"\n",
">Wang, S., Clark, R., Wen, H., & Trigoni, N. (2017). DeepVO: Towards end-to-end visual odometry with deep Recurrent Convolutional Neural Networks. Proceedings - IEEE International Conference on Robotics and Automation, 2043–2050. https://doi.org/10.1109/ICRA.2017.7989236\n",
"\n",
"![./deepvo.png](https://gist.github.com/HTLife/25c0cd362faa91477b8f28f6033adb45/raw/aff95313bc13176c69825f5871467469cf85a4d5/deepvo.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Import related libraries"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from keras.models import Sequential\n",
"from keras.layers import Activation, MaxPooling2D, Dropout, LSTM, Flatten, Merge, TimeDistributed\n",
"import numpy as np\n",
"\n",
"from keras.layers import Concatenate\n",
"\n",
"from keras.layers.convolutional import Conv2D"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Define data dimension\n",
"\n",
"Assume we have gray scale image with dimension:\n",
" * (channel, img_height, img_width) = (1, 540, 960)\n",
" \n",
"Using time-distributed CNN to process 3 images within 1 time frame.\n",
" * (batches, images, channel, img_height, img_width) = (1, 3, 1, 540, 960)\n",
" \n",
"![cnnlstm.png](https://gist.github.com/HTLife/25c0cd362faa91477b8f28f6033adb45/raw/aff95313bc13176c69825f5871467469cf85a4d5/cnnlstm.png)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"ExecuteTime": {
"end_time": "2018-02-13T02:44:39.007942Z",
"start_time": "2018-02-13T02:44:36.124672Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"time_distributed_70 (TimeDis (None, None, 1, 540, 40) 345640 \n",
"_________________________________________________________________\n",
"activation_23 (Activation) (None, None, 1, 540, 40) 0 \n",
"_________________________________________________________________\n",
"time_distributed_71 (TimeDis (None, None, 1, 270, 20) 0 \n",
"_________________________________________________________________\n",
"dropout_22 (Dropout) (None, None, 1, 270, 20) 0 \n",
"_________________________________________________________________\n",
"time_distributed_72 (TimeDis (None, None, 5400) 0 \n",
"_________________________________________________________________\n",
"lstm_23 (LSTM) (None, None, 3) 64848 \n",
"_________________________________________________________________\n",
"lstm_24 (LSTM) (None, 3) 84 \n",
"=================================================================\n",
"Total params: 410,572\n",
"Trainable params: 410,572\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n",
"Epoch 1/2\n",
"1/1 [==============================] - 1s 1s/step - loss: 1.0693\n",
"Epoch 2/2\n",
"1/1 [==============================] - 0s 9ms/step - loss: 0.7938\n",
"1/1 [==============================] - 1s 621ms/step\n",
"[[ 0.11569256 0.07892055 0.23299485]]\n"
]
}
],
"source": [
"\n",
"\n",
"sequence_lengths = None\n",
"\n",
"numberOfVideos = 10\n",
"videoLength = 3\n",
"numberOfPrediction = 3\n",
"\n",
"def defModel():\n",
"\n",
" model = Sequential()\n",
" model.add(\n",
" TimeDistributed(\n",
" Conv2D(40, (3, 3), padding='same'),\n",
" input_shape=(sequence_lengths, 1, 540, 960)))\n",
" model.add(Activation('relu'))\n",
" model.add(\n",
" TimeDistributed(\n",
" MaxPooling2D(data_format=\"channels_first\", pool_size=(2, 2))))\n",
" model.add(Dropout(0.2))\n",
"\n",
" model.add(TimeDistributed(Flatten()))\n",
" model.add(LSTM(3, return_sequences=True))\n",
" model.add(LSTM(3)) \n",
"\n",
" model.compile(loss='mse', optimizer='adam')\n",
" model.summary()\n",
" return model\n",
"\n",
"\n",
"def gen():\n",
" x_data = np.random.random((numberOfVideos, videoLength, 1, 540, 960))\n",
" y_data = np.ones((1, numberOfPrediction)) \n",
" for video in range(numberOfVideos):\n",
" x_train = x_data[video:video + 1]\n",
" y_train = y_data\n",
" yield (x_train, y_train)\n",
"\n",
"\n",
"def main():\n",
" model = defModel()\n",
"\n",
" x_train = []\n",
" seq_len = 15\n",
" for i in range(50):\n",
" x_train.append(x_data[i * 5:i * 5 + seq_len, :, :, :])\n",
" x_train = np.asarray(x_train, dtype='float32')\n",
"\n",
" model.fit_generator(generator=gen(), steps_per_epoch=1, epochs=2)\n",
" \n",
" predicted = model.predict_generator(\n",
" gen(), \n",
" steps=1,\n",
" verbose=1)\n",
" print(predicted)\n",
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" main()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment