yatszhash · January 27, 2018 10:32
diff --git a/2nd_omalley_model.png b/2nd_omalley_model.png
diff --git a/README.md b/README.md
diff --git a/omalley_model.ipynb b/omalley_model.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# kaggle tensorflow speech recognition challenge O'Malley's model (2nd Place Solution)\n",
    "\n",
    "This model architecutre was designed by Thomas O'Malley.\n",
    "Please read [his solution](https://www.kaggle.com/c/tensorflow-speech-recognition-challenge/discussion/47715)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "from keras.layers import Conv2D, MaxPool2D, Input, GlobalMaxPooling2D, Dropout, Dense "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from keras.models import Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from keras.utils import plot_model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "LOG_MEL_FILTERBANK_DIM = 120"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "n_frames = 16000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "input_ = Input(shape=(n_frames, LOG_MEL_FILTERBANK_DIM, 1))\n",
    "x = Conv2D(64, kernel_size=(7, 3), padding=\"Same\", use_bias=False)(input_)\n",
    "x = MaxPool2D(pool_size=(1, 3))(x)\n",
    "x = Conv2D(128, kernel_size=(1, 7), padding=\"same\", use_bias=False)(x)\n",
    "x = MaxPool2D(pool_size=(1, 4))(x)\n",
    "x = Conv2D(256, kernel_size=(1, 10), padding=\"valid\", use_bias=False)(x)\n",
    "x = Conv2D(512, kernel_size=(7, 1), padding=\"same\", use_bias=False)(x)\n",
    "x = GlobalMaxPooling2D()(x)\n",
    "x = Dropout(0.3)(x)\n",
    "x = Dense(256)(x)\n",
    "output_ = Dense(12, activation=\"softmax\")(x)\n",
    "model = Model(inputs=input_, outputs=output_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "input_1 (InputLayer)         (None, 16000, 120, 1)     0         \n",
      "_________________________________________________________________\n",
      "conv2d_1 (Conv2D)            (None, 16000, 120, 64)    1344      \n",
      "_________________________________________________________________\n",
      "max_pooling2d_1 (MaxPooling2 (None, 16000, 40, 64)     0         \n",
      "_________________________________________________________________\n",
      "conv2d_2 (Conv2D)            (None, 16000, 40, 128)    57344     \n",
      "_________________________________________________________________\n",
      "max_pooling2d_2 (MaxPooling2 (None, 16000, 10, 128)    0         \n",
      "_________________________________________________________________\n",
      "conv2d_3 (Conv2D)            (None, 16000, 1, 256)     327680    \n",
      "_________________________________________________________________\n",
      "conv2d_4 (Conv2D)            (None, 16000, 1, 512)     917504    \n",
      "_________________________________________________________________\n",
      "global_max_pooling2d_1 (Glob (None, 512)               0         \n",
      "_________________________________________________________________\n",
      "dropout_1 (Dropout)          (None, 512)               0         \n",
      "_________________________________________________________________\n",
      "dense_1 (Dense)              (None, 256)               131328    \n",
      "_________________________________________________________________\n",
      "dense_2 (Dense)              (None, 12)                3084      \n",
      "=================================================================\n",
      "Total params: 1,438,284\n",
      "Trainable params: 1,438,284\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "plot_model(model, \"2nd_omalley_model.png\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "aind-vui",
   "language": "python",
   "name": "aind-vui"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# kaggle tensorflow speech recognition challenge O'Malley's model (2nd Place Solution)\n",
	"\n",
	"This model architecutre was designed by Thomas O'Malley.\n",
	"Please read [his solution](https://www.kaggle.com/c/tensorflow-speech-recognition-challenge/discussion/47715)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"from pathlib import Path"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"Using TensorFlow backend.\n"
	]
	}
	],
	"source": [
	"from keras.layers import Conv2D, MaxPool2D, Input, GlobalMaxPooling2D, Dropout, Dense "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"from keras.models import Model"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"from keras.utils import plot_model"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"LOG_MEL_FILTERBANK_DIM = 120"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"n_frames = 16000"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [],
	"source": [
	"input_ = Input(shape=(n_frames, LOG_MEL_FILTERBANK_DIM, 1))\n",
	"x = Conv2D(64, kernel_size=(7, 3), padding=\"Same\", use_bias=False)(input_)\n",
	"x = MaxPool2D(pool_size=(1, 3))(x)\n",
	"x = Conv2D(128, kernel_size=(1, 7), padding=\"same\", use_bias=False)(x)\n",
	"x = MaxPool2D(pool_size=(1, 4))(x)\n",
	"x = Conv2D(256, kernel_size=(1, 10), padding=\"valid\", use_bias=False)(x)\n",
	"x = Conv2D(512, kernel_size=(7, 1), padding=\"same\", use_bias=False)(x)\n",
	"x = GlobalMaxPooling2D()(x)\n",
	"x = Dropout(0.3)(x)\n",
	"x = Dense(256)(x)\n",
	"output_ = Dense(12, activation=\"softmax\")(x)\n",
	"model = Model(inputs=input_, outputs=output_)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"_________________________________________________________________\n",
	"Layer (type) Output Shape Param # \n",
	"=================================================================\n",
	"input_1 (InputLayer) (None, 16000, 120, 1) 0 \n",
	"_________________________________________________________________\n",
	"conv2d_1 (Conv2D) (None, 16000, 120, 64) 1344 \n",
	"_________________________________________________________________\n",
	"max_pooling2d_1 (MaxPooling2 (None, 16000, 40, 64) 0 \n",
	"_________________________________________________________________\n",
	"conv2d_2 (Conv2D) (None, 16000, 40, 128) 57344 \n",
	"_________________________________________________________________\n",
	"max_pooling2d_2 (MaxPooling2 (None, 16000, 10, 128) 0 \n",
	"_________________________________________________________________\n",
	"conv2d_3 (Conv2D) (None, 16000, 1, 256) 327680 \n",
	"_________________________________________________________________\n",
	"conv2d_4 (Conv2D) (None, 16000, 1, 512) 917504 \n",
	"_________________________________________________________________\n",
	"global_max_pooling2d_1 (Glob (None, 512) 0 \n",
	"_________________________________________________________________\n",
	"dropout_1 (Dropout) (None, 512) 0 \n",
	"_________________________________________________________________\n",
	"dense_1 (Dense) (None, 256) 131328 \n",
	"_________________________________________________________________\n",
	"dense_2 (Dense) (None, 12) 3084 \n",
	"=================================================================\n",
	"Total params: 1,438,284\n",
	"Trainable params: 1,438,284\n",
	"Non-trainable params: 0\n",
	"_________________________________________________________________\n"
	]
	}
	],
	"source": [
	"model.summary()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"plot_model(model, \"2nd_omalley_model.png\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "aind-vui",
	"language": "python",
	"name": "aind-vui"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}