Skip to content

Instantly share code, notes, and snippets.

@hans
Created April 4, 2019 18:19
Show Gist options
  • Save hans/2a147d00f4376cf50e1eb06828d5dbde to your computer and use it in GitHub Desktop.
Save hans/2a147d00f4376cf50e1eb06828d5dbde to your computer and use it in GitHub Desktop.
CHILDES analysis: extract mother–child question–answer pairs from Brown corpus
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"sns.set_style(\"whitegrid\")\n",
"\n",
"import pylangacq as pla"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"eve = pla.read_chat(\"Brown/**/*.cha\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5,1,'Age histogram')"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEFCAYAAAAL/efAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAHf9JREFUeJzt3XtU1HX+x/HncFcueQuz8AIqibod\n3dTWlp+apbCWhR3vha2YpdImlq6IGF5INNNSSk3KtTRvsW7ZVqZiraZCnTJdDfOSl1BTTE1AmeHy\n/f3hz/llJY04M4Df1+Mcz5mZ73y/nzcfhtd8/HxvFsMwDERExFQ8qroAERFxP4W/iIgJKfxFRExI\n4S8iYkIKfxERE1L4i4iYkMJfaoSSkhIiIyN5/PHHnbrd9PR0pk6d+pvLhg8fzoEDBypcPy4ujjNn\nzji1JhF38KrqAkQcsWHDBlq1asXu3bs5ePAgzZs3d3mbGRkZv/uerVu3urwOEVew6CQvqQliY2Pp\n1asX+/fvp7S01D5aX7RoEZmZmfj7+9OhQweysrLYtGkTNpuNF198kS+++IKysjJat25NcnIyAQEB\nV2w3PT2dL7/8krKyMvLz82nQoAFz5swhODiY7t27M3fuXMLCwpgwYQJHjhzBw8ODNm3aMHXqVCZO\nnMiaNWsIDw9n0aJFFBYWMnXqVM6dO4fFYiEuLo6YmJgK60xMTOTcuXN8//33dOvWjb59+zJ16lSK\niorIz8+nVatWvPzyy/j6+vKHP/yBoUOHsm3bNi5cuMBTTz3FunXr2LdvH8HBwSxcuJDatWu7/Xcj\nNZOmfaTaO3DgADt27CA6OpqYmBjee+89zp49y5YtW1izZg2ZmZmsWbOGoqIi+zqLFi3C09OTNWvW\nsHbtWoKDg3nxxRd/c/vff/89c+fOZd26dQQFBfHOO+9csXzDhg0UFRXx3nvvkZmZaV8nLS0NgDff\nfJObb76ZkSNHEhsby/vvv09GRgZz5sxhx44dFdYJUFxczAcffMC4ceNYvXo1MTExrF69mvXr15OX\nl8enn34KgM1mo0GDBmRmZhITE0NycjITJ07kww8/pLCwkKysLGd1uZiApn2k2luxYgX33HMPdevW\npW7duoSEhLB69Wry8/OJjo4mKCgIgEceeYTs7GwAPv30UwoKCti2bRtwaZ9B/fr1f3P7f/7zn6lX\nrx4ArVq1+tUc/p133slLL71EbGwsd999N4899hhNmza94j2HDx/GarXSs2dPABo2bEjPnj3ZsmUL\n58+fv2qdl7d/2bhx49i6dSsZGRkcPnyYU6dOceHCBfvyqKgoAJo0aUJ4eDgNGzYEICQkhJ9++ula\nulVMTuEv1dqFCxd477338PHxoXv37gAUFhaybNky7r//fn4+a+np6Wl/XF5eTlJSEl27dgWgqKgI\nq9X6m214ef3/n4HFYuGXM6GNGzdmw4YN5OTkkJ2dzdChQ5k6daq9HoCysjIsFssV6xmGQWlpKV5e\nXletE7hiquaZZ56hrKyMv/zlL3Tr1o0TJ05csa63t/dvPha5Vpr2kWrt/fffp06dOmzZsoVNmzax\nadMmNm7cyIULF2jTpg3r16+noKAAwD4lAxAZGcnbb7+NzWajvLycSZMmMWfOnErVsHz5ciZMmEBk\nZCTjxo0jMjKSb775BrgU5KWlpYSFheHl5cX69esBOHnyJB9//DF33303Xbt2vWqdv/TZZ58RHx9P\nr169ANi5cydlZWWVqlukIhr5S7W2YsUKhg4desVoOSgoiNjYWJYsWUL//v0ZMGAAfn5+tGzZklq1\nagEwatQoZs6cSZ8+fSgrKyMiIoLExMRK1RATE8Pnn39Or169qFWrFo0aNSI2NhaA6OhoYmNjSU9P\nZ/78+aSmppKenk5ZWRnx8fH86U9/Arhqnb80ZswY4uPjqV27NgEBAXTs2JGjR49Wqm6RiuhoH6mx\n/vvf/7Jjxw6GDBkCwD/+8Q927tzJyy+/XMWVXamm1CnmovCXGquwsJCkpCS+++47LBYLjRo1Ytq0\nafadoNVFTalTzEXhLyJiQtrhKyJiQgp/ERETqrZH+3z99df4+vq6tU2r1er2Nqs79cmV1B+/pj65\nUlX3h9VqpV27dr/7vmob/r6+vkRERLi1zdzcXLe3Wd2pT66k/vg19cmVqro/cnNzHXqfpn1ERExI\n4S8iYkIKfxERE6q2c/4iUj2VlJSQl5dHcXGx/bmj88xm4K7+8PPzIyQkpNIX+FP4i8g1ycvLIzAw\nkGbNmmGxWLh48eJVr1VkRu7oD8Mw+PHHH8nLyyM0NLRS29C0j4hck+LiYurXr/+rS1iL+1gsFurX\nr2//31dlKPxF5Jop+Kve9f4OFP4icl0sns69qUxxie5f4A6a8xeR6+Ln40WzxA+ctr3DM+532rYc\nMWbMGGbOnImPj49b2z1+/Dh79+6le/fuxMbGMnnyZJo3b+629jXyv0G4arTkyJmKGqlJTfbSSy+5\nPfgBsrOz+eqrr9ze7mUa+d8g/Lw9nTr6uhbuHqmJuRUWFjJx4kQKCgo4e/Ys/fr1Y/DgwezatYsp\nU6bg7+9P/fr18fX1ZcaMGSxdupR///vfWCwWevXqZb+pzmXdu3fno48+IiUlBR8fH44dO8apU6eY\nMWMGbdq0sb8vJyeHRYsW4e3tzQ8//MDAgQPJzs5m7969DBkyhMGDB7N161bmzJlDrVq1qFOnDtOn\nTyc3N5eMjAy8vb3Jy8ujV69ePPHEEyxatIji4mLat28PwKuvvsrp06e5ePEic+bMwd/fn4SEBAzD\noKSkhClTpnD77bc7rR818heRGuXIkSPcf//9LF68mIULF7JkyRIAUlJSmDFjBm+99RZNmjQB4MCB\nA3z44YcsX76c5cuXs3HjRr777rurbvvWW2/ljTfeIDY2llWrVv1q+Q8//EB6ejqTJ09mwYIFvPDC\nC2RkZLBq1SoMw2DSpEnMnj2bZcuW0bFjRxYsWABcmuJJT09n1apVvP7663h6evLEE0/wwAMPcO+9\n9wLQtWtX3nrrLbp06cK6devYtWsXgYGBZGRkkJycTGFhoVP7UeEvIjVKgwYN2LhxI2PHjmXBggWU\nlpYCcOrUKVq2bAnAnXfeCcC+ffs4fvw4f/3rX3nsscc4d+5chfdEvjzNecstt2Cz2X61vGXLlnh7\nexMYGEiTJk3w8fHhpptuwmq1cvbsWQICAux3aOvYsSP79+8HIDw8HC8vL2rXro2fn99vtt22bVv7\nz1dcXEyXLl3o2LEjo0aNYt68eXh4ODeuXTLtU1ZWRnJyMocOHcLT05O0tDQMwyAxMRGLxULLli1J\nSUlx+g8jIje+xYsX065dOwYPHkx2djb/+c9/gEuBfeDAAVq0aMHOnTsBCAsLo0WLFrz++utYLBaW\nLFlCeHj4Vbf9e4dPVrS8bt26FBYWkp+fT5MmTfj8889p1qzZVdfz8PCgvLz8qtvLyckhODiYxYsX\ns2PHDubMmcPSpUsrrO9auCT8P/nkEwBWrlxJTk6OPfwTEhK46667eO6558jKyqJHjx6uaF5E3KjY\nVurU/T7FJWX4eXtedfk999zD5MmTef/996lTpw6enp7YbDZSUlJISkqidu3aeHt707BhQ1q1akXn\nzp0ZNGgQNpuNO+64w2X3TrZYLKSmpvLss8/i6enJTTfdRFpamn30/0vh4eEsWLDgiv0KP9eqVSvG\njBnDm2++iYeHB/Hx8c6t11X38C0tLcXLy4t//etffPXVV3z66ads3rwZi8XCxo0b2bp1KykpKVdd\nvypu5lJcXHzV/5JVdxEREVW6w9cs13apyZ8RZykpKbFPr8ClSw1Uh5O+Vq5cSc+ePalXrx6vvPIK\n3t7ePPnkk26vw539sX///t+8to8jR+m57GgfLy8vxo8fz4YNG5g3bx6ffPKJvUP8/f0pKCiocH3d\nzKVmMUu/6TNyqQ9+fu2a6nJtn0aNGhEfH0/t2rUJDAxkxowZVVKXO/vD29v7V59HRwdiLj3Uc+bM\nmYwdO5b+/ftjtVrtrxcVFREUFOTKpkXEZKKjo4mOjq7qMmoMl+xxfffdd3nttdcAqFWrFhaLhbZt\n25KTkwPA5s2b6dChgyuaFhE3cNFssVyD6/0duGTk37NnTyZMmMAjjzxCaWkpSUlJNG/enEmTJjFn\nzhzCwsKIiopyRdMi4mJ+fn78+OOPurJnFbp8Sefr2f/kkvCvXbs2c+fO/dXry5Ytc0VzIuJGISEh\n5OXlkZ+fD1zaAVzZG4rciNzVH5dv5lJZuryDiFwTb2/vK24gop3gV6op/aGzrKTGqooLyl3+o9bF\n7KSm08hfaixdzE6k8jTyFxExIYW/iIgJKfxFRExI4S8iYkIKfxERE1L4i4iYkMJfRMSEFP4iIiak\n8BcRMSGFv4iICSn8RURMSOEvImJCCn8RERNS+IuImJDCX0TEhBT+IiImpPAXETEhhb+IiAkp/OW6\n6X625nC137Orb1auz5dr6B6+ct2q6l66uo+ue+n3fGPRyF9ExIQU/iIiJuT0aZ+SkhKSkpI4duwY\nNpuNkSNHcssttzBixAiaNWsGwKBBg+jVq5ezmxYREQc5PfzXrl1LnTp1mDVrFmfPnqVPnz7Ex8cz\ndOhQ4uLinN2ciIhUgtPDPzo6mqioKPtzT09Pdu/ezaFDh8jKyqJp06YkJSUREBDg7KZFRMRBTg9/\nf39/AAoLC3n66adJSEjAZrPRr18/2rZty4IFC3j11VcZP358hduxWq3k5uY6u7wKFRcXu71NZ3H1\n4XZypeKSMvy8Pd3ebtFFK0cPf+f2dqFqP2M16e+ypuSISw71PHHiBPHx8QwePJjevXtz/vx5goKC\nAOjRowfTpk373W34+vq6/cOWm5urEBWHVOVhj2b8jNakn7mqc8TRLx6nH+1z+vRp4uLiGDduHH37\n9gVg2LBh7Nq1C4Dt27fTpk0bZzcrIiLXwOkj/4ULF3L+/Hnmz5/P/PnzAUhMTGT69Ol4e3vToEED\nh0b+IiLiOk4P/+TkZJKTk3/1+sqVK53dlIiIVJJO8hIRMSGFv4iICSn8RURMSOEvImJCCn8RERNS\n+IuImJDCX0TEhBT+IiImpPAXETEhhb+IiAkp/EVETEjhLyJiQgp/ERETUviLiJiQwl9ExIQU/iI1\nSHFJWVWXIDcIl9zDV0Rco6ruHQyX7h8sNw6N/EVETEjhLyJiQgp/ERETUviLiJiQwl9ExIQU/iJS\nrVXV4a03+mG1OtRTRKq1qjq89UY/tFUjfxERE1L4i4iYkNOnfUpKSkhKSuLYsWPYbDZGjhxJixYt\nSExMxGKx0LJlS1JSUvDw0PeOiEhVcXr4r127ljp16jBr1izOnj1Lnz59aNWqFQkJCdx1110899xz\nZGVl0aNHD2c3LSIiDnJ6+EdHRxMVFWV/7unpyZ49e+jUqRMAXbp0YevWrb8b/larldzcXGeXV6Hi\n4mK3t+ksERERVV2CyA2nMnlQU3LE6eHv7+8PQGFhIU8//TQJCQnMnDkTi8ViX15QUPC72/H19XV7\noOXm5ipERcSuMnlQ1Tni6BePSybeT5w4wZAhQ3jooYfo3bv3FfP7RUVFBAUFuaJZERFxkNPD//Tp\n08TFxTFu3Dj69u0LQOvWrcnJyQFg8+bNdOjQwdnNiojINXAo/E+fPu3wBhcuXMj58+eZP38+sbGx\nxMbGkpCQQHp6OgMGDKCkpOSKfQIiIuJ+Ds35/+1vf6NevXr07duXrl27VniYZnJyMsnJyb96fdmy\nZZWvUkREnMqh8F+xYgUHDx4kMzOTBQsW0LlzZ/r27Uvjxo1dXZ+IiLiAw3P+wcHBNG7cGD8/P/bt\n28fzzz/P3LlzXVmbiIi4iEMj/9GjR7N//34efPBBZs2aRcOGDQF4+OGHGT16tEsLFBER53Mo/Pv3\n70+7du3w9/fn1KlT9tdXrFjhssJERMR1HJr22bFjB+np6QCkpqayaNEi4NKJWCIiUvM4FP6bNm0i\nMTERgHnz5rFp0yaXFiUiIq7lUPhbLBZsNhtw6aqdhmG4tCgREXEth+b8Bw4cSO/evQkPD+e7777j\n8ccfd3VdIiLiQg6Ff79+/bj33nv5/vvvady4MfXq1XN1XSIi4kIOhX9ubi6rVq3CarXaX0tLS3NZ\nUSIi4loOhX9iYiKPPvoot9xyi6vrERERN3Ao/Bs0aEC/fv1cXYuIiLiJQ+F/2223sWjRIiIiIuw3\nZYmMjHRpYSIi4joOhX9JSQmHDh3i0KFD9tcU/iIiNZdD4Z+WlsahQ4c4evQot99+O8HBwa6uS0RE\nXMih8F+2bBkbNmzgp59+ok+fPhw5coTnnnvO1bWJiIiLOHSG7wcffMCSJUsIDAzkscceY+fOna6u\nS0REXMih8L98OYfLO3t9fHxcV5GIiLicQ9M+DzzwAI888gjHjx9n+PDh3Hfffa6uS0REXMih8H/0\n0Ufp3Lkz+/btIzQ0lFatWrm6LhERcSGHwv+VV16xPz548CAbN27kqaeecllRIiLiWg6f4QuX5v6/\n+eYbysvLXVqUiEhVKy4pw8/b85rXi4iIqLK2r4XDl3T+OV3SWURudH7enjRL/KBK2j48436Xt+FQ\n+P/8zN78/HxOnDjhsoJERMT1HAr/n5/Q5evry9///neXFSQiIq7nUPgvXbr0mje8c+dOXnzxRZYu\nXcqePXsYMWIEzZo1A2DQoEH06tXrmrcpIiLO4VD4P/jggxQVFeHr62u/oYthGFgsFrKysn71/oyM\nDNauXUutWrUA+Oabbxg6dChxcXFOLF1ERCrLofBv3749MTExtG/fnm+//ZY33niD1NTUq76/SZMm\npKen26eHdu/ezaFDh8jKyqJp06YkJSUREBDgnJ9ARESumUPhf/DgQdq3bw/A7bffzokTJyq8xENU\nVBR5eXn253fccQf9+vWjbdu2LFiwgFdffZXx48dX2KbVaiU3N9eR8pymuLjY7W06izMOLxOR6sPV\nWeRQ+AcGBvLyyy9zxx138OWXX3LrrbdeUyM9evQgKCjI/njatGm/u46vr6/bAy03N1chKiLVQmWz\nyNEvDYcu7DZ79mwCAgLYsmULjRs35vnnn7+mYoYNG8auXbsA2L59O23atLmm9UVExLkcGvn7+vpy\n0003ceHCBUJDQzl//jz16tVzuJHJkyczbdo0vL29adCggUMjfxERcR2Hj/MPDg5m27ZttG3blvHj\nx5ORkVHhOiEhIaxevRqANm3asHLlyuuvVkREnMKhaZ+jR48yevRofHx86N69OwUFBa6uS0REXMih\n8C8rK+PMmTNYLBYKCwvx8HBoNRERqaYcmvYZM2YMgwYNIj8/nwEDBjBx4kRX1yUiIi7kUPifOHGC\njz/+mDNnzlC3bl377RxFRKRmcmj+5vKO23r16in4RURuAA6N/G02GzExMYSGhtrn+2fPnu3SwkRE\nxHUqDP/58+czatQoxo4dy8mTJ2nYsKG76hIREReqcNonOzsbgE6dOvHOO+/QqVMn+z8REam5Kgx/\nwzB+87GIiNRsFYb/z3fuakeviMiNo8I5/z179jBw4EAMw+DAgQP2xxaLRZdrEBGpwSoM/7Vr17qr\nDhERcaMKw/+2225zVx0iIuJGukiPiIgJKfxFRExI4S8iYkIKfxERE1L4i4iYkMJfRMSEFP4iIiak\n8BcRMSGFv4iICSn8RURMSOEvImJCCn8RERNyWfjv3LmT2NhYAI4cOcKgQYMYPHgwKSkplJeXu6pZ\nERFxgEvCPyMjg+TkZKxWKwBpaWkkJCSwfPlyDMMgKyvLFc2KiIiDXBL+TZo0IT093f58z5499vv+\ndunShW3btrmiWRERcVCF1/OvrKioKPLy8uzPL9/9C8Df35+CgoLf3YbVaiU3N7fSNTRpFoZ/Ld9r\nWiciIqLS7V1WdNHK0cPfXfd2rpUzaheR6uN68s8RLgn/X/Lw+P//YBQVFREUFPS76/j6+l53oDVL\n/OC61q+MwzPuVxCLyHWrbI44+qXhlqN9WrduTU5ODgCbN2+mQ4cO7mhWRESuwi3hP378eNLT0xkw\nYAAlJSVERUW5o1kREbkKl037hISEsHr1agBCQ0NZtmyZq5oSEZFrpJO8RERMSOEvImJCCn8RERNS\n+IuImJDCX0TEhBT+IiImpPAXETEhhb+IiAkp/J2suKSsqksQEfldbrmwm5n4eXtW2QXlREQcpZG/\niIgJKfxFRExI4S8iYkIKfxERE1L4i4iYkMJfRMSEFP4iIiak8BcRMSGFv4iICSn8RURMSOEvImJC\nCn8RERNS+IuImJDCX0TEhBT+IiImpPAXETEht97MJSYmhsDAQABCQkJIS0tzZ/MiIvJ/3Bb+VqsV\ngKVLl7qrSRERuQq3hf/evXu5ePEicXFxlJaW8swzz9CuXburvt9qtZKbm1vp9iIiIiq9rohIVbue\n/HOE28Lfz8+PYcOG0a9fPw4fPszw4cNZt24dXl6/XYKvr68CXERMq7L55+iXhtvCPzQ0lKZNm2Kx\nWAgNDaVOnTrk5+fTqFEjd5UgIiL/x21H+2RmZjJjxgwATp48SWFhITfffLO7mhcRkZ9x28i/b9++\nTJgwgUGDBmGxWJg+ffpVp3xERMS13Ja+Pj4+zJ49213NiYhIBXSSl4iICSn8RURMSOEvImJCCn8R\nERNS+IuImJDCX0TEhBT+IiImpPAXETEhhb+IiAkp/EVETEjhLyJiQgp/ERETUviLiJiQwl9ExIQU\n/iIiJqTwFxExIYW/iIgJKfxFRExI4S8iYkIKfxERE1L4i4iYkMJfRMSEFP4iIiak8BcRMSGFv4iI\nCXm5q6Hy8nImT57Mt99+i4+PD6mpqTRt2tRdzYuIyM+4beS/ceNGbDYbq1at4tlnn2XGjBnualpE\nRH7BbeH/5Zdf8j//8z8AtGvXjt27d7uraRER+QWLYRiGOxqaOHEiPXv2pGvXrgB069aNjRs34uX1\n2zNPX3/9Nb6+vu4oTUTkhmG1WmnXrt3vvs9tc/4BAQEUFRXZn5eXl181+AGHihcRkcpx27TPH//4\nRzZv3gxcGtWHh4e7q2kREfkFt037XD7aZ9++fRiGwfTp02nevLk7mhYRkV9wW/iLiEj1oZO8RERM\nSOEvImJCCn8RERNy26Ge1U1JSQlJSUkcO3YMm83GyJEjadGiBYmJiVgsFlq2bElKSgoeHub5fiwr\nKyM5OZlDhw7h6elJWloahmGYuk8AfvzxRx5++GEWL16Ml5eX6fsjJiaGwMBAAEJCQhgwYADPP/88\nnp6eREZG8tRTT1Vxhe712muvsWnTJkpKShg0aBCdOnWqGZ8Rw6QyMzON1NRUwzAM48yZM0bXrl2N\nJ5980sjOzjYMwzAmTZpkrF+/vipLdLsNGzYYiYmJhmEYRnZ2tjFixAjT94nNZjNGjRpl9OzZ0zhw\n4IDp+6O4uNh46KGHrnjtwQcfNI4cOWKUl5cbjz/+uLF79+4qqs79srOzjSeffNIoKyszCgsLjXnz\n5tWYz0g1/Dpyj+joaEaPHm1/7unpyZ49e+jUqRMAXbp0Ydu2bVVVXpW47777mDZtGgDHjx+nQYMG\npu+TmTNnMnDgQIKDgwFM3x979+7l4sWLxMXFMWTIEL744gtsNhtNmjTBYrEQGRnJ9u3bq7pMt/ns\ns88IDw8nPj6eESNG0K1btxrzGTFt+Pv7+xMQEEBhYSFPP/00CQkJGIaBxWKxLy8oKKjiKt3Py8uL\n8ePHM23aNKKiokzdJ2vWrKFevXr2a1IBpu4PAD8/P4YNG8Ybb7zBlClTmDBhArVq1bIvN1ufnD17\nlt27dzN37lymTJnC2LFja8xnxLRz/gAnTpwgPj6ewYMH07t3b2bNmmVfVlRURFBQUBVWV3VmzpzJ\n2LFj6d+/P1ar1f662frkn//8JxaLhe3bt5Obm8v48eM5c+aMfbnZ+gMgNDSUpk2bYrFYCA0NJTAw\nkHPnztmXm61P6tSpQ1hYGD4+PoSFheHr68sPP/xgX16d+8O0I//Tp08TFxfHuHHj6Nu3LwCtW7cm\nJycHgM2bN9OhQ4eqLNHt3n33XV577TUAatWqhcVioW3btqbtk7fffptly5axdOlSIiIimDlzJl26\ndDFtfwBkZmbaL8d+8uRJLl68SO3atTl69CiGYfDZZ5+Zqk/uvPNOtmzZgmEY9v7o3LlzjfiMmPYM\n39TUVD766CPCwsLsr02cOJHU1FRKSkoICwsjNTUVT0/PKqzSvS5cuMCECRM4ffo0paWlDB8+nObN\nmzNp0iTT9sllsbGxTJ48GQ8PD1P3h81mY8KECRw/fhyLxcLYsWPx8PBg+vTplJWVERkZyZgxY6q6\nTLd64YUXyMnJwTAMxowZQ0hISI34jJg2/EVEzMy00z4iImam8BcRMSGFv4iICSn8RURMSOEvImJC\nCn8RERNS+IuImND/Ane2YbYti5r2AAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pd.Series(list(eve.age(months=True).values())).to_frame(\"age in months\").plot.hist()\n",
"plt.title(\"Age histogram\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"utterances = eve.utterances()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"prefixes = [\"who\", \"where\", \"when\", \"what\", \"which\", \"why\", \"how\"]\n",
"# Find sentences with a matching prefix.\n",
"sent_pairs = [(i, s1, s2) for i, ((speaker1, s1), (speaker2, s2)) in enumerate(zip(utterances, utterances[1:]))\n",
" if s1.split(\" \")[0].lower() in prefixes\n",
" and speaker1 == \"MOT\" and speaker2 == \"CHI\"]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4948"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(sent_pairs)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(63, \"why don't you read Shadow yourself ?\", 'read Shadow .'),\n",
" (81, 'what is that ?', 'who (th)at ?'),\n",
" (119, \"why don't you read it to me ?\", 'there boots dog .'),\n",
" (140, 'how many do you have like this ?', 'two .'),\n",
" (145, 'where are you going ?', 'hit ball .'),\n",
" (147, 'how does a bunny+rabbit walk ?', 'a bunny+rabbit walk .'),\n",
" (151, 'what are you doing ?', 'sweep broom .'),\n",
" (169, 'what ?', 'get light .'),\n",
" (176, 'what is it ?', 'hear tractor .'),\n",
" (226, 'what kind of hat is that ?', 'Adam hat .')]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sent_pairs[:10]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>utt_id</th>\n",
" <th>mot_utterance</th>\n",
" <th>chi_utterance</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>63</td>\n",
" <td>why don't you read Shadow yourself ?</td>\n",
" <td>read Shadow .</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>81</td>\n",
" <td>what is that ?</td>\n",
" <td>who (th)at ?</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>119</td>\n",
" <td>why don't you read it to me ?</td>\n",
" <td>there boots dog .</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>140</td>\n",
" <td>how many do you have like this ?</td>\n",
" <td>two .</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>145</td>\n",
" <td>where are you going ?</td>\n",
" <td>hit ball .</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" utt_id mot_utterance chi_utterance\n",
"0 63 why don't you read Shadow yourself ? read Shadow .\n",
"1 81 what is that ? who (th)at ?\n",
"2 119 why don't you read it to me ? there boots dog .\n",
"3 140 how many do you have like this ? two .\n",
"4 145 where are you going ? hit ball ."
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(sent_pairs, columns=[\"utt_id\", \"mot_utterance\", \"chi_utterance\"]) \n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(\"questions_answers.csv\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment