Skip to content

Instantly share code, notes, and snippets.

@hbredin
Created February 17, 2016 13:40
Show Gist options
  • Save hbredin/46669147d8d39a5ad224 to your computer and use it in GitHub Desktop.
Save hbredin/46669147d8d39a5ad224 to your computer and use it in GitHub Desktop.
Cooccurrence matrix
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here is a simple example on how to compute speaker/name cooccurrence matrix."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"%pylab inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from pyannote.core import Segment, Annotation\n",
"from xarray import DataArray"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Speech turns"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABHAAAACxCAYAAACsjUrYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADTxJREFUeJzt3X+s73VdB/Dni64BV4YtjaTLj3TOQVtSyACFktJCE7my\nWSqVrlr2RyVT1y+24rqV859wrPEPgygbaRO9oLKCEm8rJnhDQFTMzFS4IWhWQGwq8OqP88GuxOF8\nv5dz7+d9Do/Hdvb9sfc5e96z9z7f73ne9/v9re4OAAAAAOM6aO4AAAAAADwxBQ4AAADA4BQ4AAAA\nAINT4AAAAAAMToEDAAAAMDgFDgAAAMDgFDgAAAAAg1PgAAAAAAxOgQMAAAAwOAUOAAAAwOAUOAAA\nAACDU+AAAAAADG7TFThVtauqvl5VT5s7yxyq6otV9WBV3VdV/1FVH6qqbXPnmkNVnVtVu6vq/qra\nU1XXVNVpc+cCAACAZW2qAqeqjk1ycpJ7k5w9c5y5dJJXdvfhSY7Myu/iT+aNdOBV1VuTXJjkD5Mc\nkeSYJBcnedWcuQAAAGBfVHfPnWHdVNXvJzkpyU1JXtTdT7k/1qvq35L8SndfPz1+RZJ3dfdx8yY7\ncKrq8CR7kryxuz8wdx4AAAB4sras1w/as+3odWuCtu25s/bxW9+Q5IIku5PsqKrv6+6vrleuRZx6\nwbXr8nu48e1n7uvv4NuqamuS1yb52JNPtLjtO89at7lw9Tkf3pffw4uSHJzkqvXKAQAAAHNatwJn\nblV1epJtST7Y3Q9U1aeTnJvkonmTzeKqqnooyWFZ2UJ15sx5DrRnJvladz8ydxAAAABYD5tmC1VV\nXZLkiO5+9fT4/CSv6e4T5012YE1bqH65uz9aVZXk1UkuTXJ8d987b7oDo6rOTPKhJIcocQAAANgM\nNsUhxlV1SJKfS/KTVXV3Vd2d5G1JTqiqH5433SwqSXrFziQPJzl93kgH1MeSfCMr5RUAAABseJui\nwElyTpKHkhyf5ITp6/gk/5DkjTPmml1VbU/yPUnumDvLgdLd92XlLKSLq2p7VR1aVVuq6uVV9c65\n8wEAAMCyNsUWqqr66yS3d/dvP+b5n83KGThHPVW20kxbqI7IyqqbTvKlJO/o7vfOGmwGVfX6JG9N\nclyS+5PcnOSPuvvGWYMBAADAkjZFgQMAAACwmW2WLVQAAAAAm5YCBwAAAGBwChwAAACAwSlwAAAA\nAAanwAEAAAAYnAIHAAAAYHBbFh1YVT5vHAAAAGCddXetNWbhAmf6gfuehqeUHTt2ZMeOHXPHYAMw\nV1iG+cKizBWWYb6wKHOFZZgvLKpqze4miS1UAAAAAMNT4AAAAAAMToHDfnHGGWfMHYENwlxhGeYL\nizJXWIb5wqLMFZZhvrDeatFzbaqqnYEDAAAAsH6qaqFDjK3AAQAAABicAgcAAABgcAocAAAAgMEp\ncAAAAAAGp8ABAAAAGJwCBwAAAGBwChwAAACAwSlwAAAAAAanwAEAAAAYnAIHAAAAYHAKHAAAAIDB\nKXAAAAAABqfAAQAAABicAgcAAABgcAocAAAAgMEpcAAAAAAGp8ABAAAAGJwCBwAAAGBwChwAAACA\nwSlwAAAAAAanwAEAAAAYnAIHAAAAYHAKHAAAAIDBKXAAAAAABqfAAQAAABicAgcAAABgcAocAAAA\ngMEpcAAAAAAGp8ABAAAAGJwCBwAAAGBwChwAAACAwSlwAAAAAAanwAEAAAAYnAIHAAAAYHAKHAAA\nAIDBKXAAAAAABqfAAQAAABicAgcAAABgcAocAAAAgMEpcAAAAAAGp8ABAAAAGJwCBwAAAGBwChwA\nAACAwSlwAAAAAAanwGHd7dl2dPZsO3ruGGwAp15wbU694Nq5Y7BBuLawKHOFZWzfeVa27zxr7hhs\nAOYKy/BaxKKWmScKHAAAAIDBKXAAAAAABqfAAQAAABicAgcAAABgcFuWGewQJmB/cJAxi3j/dOu1\nCNgfHE4L7A/et7CerMABAAAAGNxSK3C27blzf+VgE9Eys6wb337m3BHYAPZcunLrtYi1eB1iX1x9\nzofnjsDgrNJiX3jfwlp8jDgAAADAJqLAAQAAABicAgcAAABgcAocAAAAgMFVdy82sKoXHQsAAADA\n2qoq3V1rjbMCBwAAAGBwChwAAACAwSlwAAAAAAanwAEAAAAYnAIHAAAAYHAKHAAAAIDBKXAAAAAA\nBqfAAQAAABicAgcAAABgcAocAAAAgMEpcAAAAAAGp8ABAAAAGJwCBwAAAGBwChwAAACAwSlwAAAA\nAAanwAEAAAAYnAIHAAAAYHAKHAAAAIDBKXAAAAAABqfAAQAAABicAgcAAABgcAocAAAAgMEpcAAA\nAAAGp8ABAAAAGJwCBwAAAGBwChwAAACAwSlwAAAAAAanwAEAAAAYnAIHAAAAYHAKHAAAAIDBKXAA\nAAAABqfAAQAAABicAgcAAABgcAocAAAAgMEpcAAAAAAGp8ABAAAAGJwCBwAAAGBwChwAAACAwSlw\nAAAAAAanwAEAAAAYnAIHAAAAYHAKHAAAAIDBKXAAAAAABqfAAQAAABicAgcAAABgcAoc9otdu3bN\nHYENwlxhGeYLizJXWIb5wqLMFZZhvrDeFDjsFy5WLMpcYRnmC4syV1iG+cKizBWWYb6w3hQ4AAAA\nAINT4AAAAAAMrrp7sYFViw0EAAAAYGHdXWuNWbjAAQAAAGAetlABAAAADE6BAwAAADC4hQqcqnp5\nVX22qj5XVb+zv0OxcVXVZVV1T1V9cu4sjK2qjqqq66vq01V1e1W9ee5MjKmqDq6qm6rqlmm+vGPu\nTIyvqg6qqk9U1QfnzsLYquqLVXXbdI35+Nx5GFdVPaOq3ldVd0yvR6fMnYkxVdXzp2vKJ6bb//Ze\nl9VU1e9N15RPVtUVVfXdq45d6wycqjooyeeSvDTJvyfZneR13f3Z9QzN5lBVpyd5IMm7u/sFc+dh\nXFX17CTP7u5bq+qwJDcn2e7awuOpqq3d/WBVfVeSG5K8rbtvmDsX46qqtyR5YZLDu/vsufMwrqr6\nQpIXdvd/zp2FsVXVnyX5++6+vKq2JNna3ffNHIvBTX9P35XklO6+c+48jKWqjk3y0STHdfc3q+qv\nklzT3e9+vPGLrMA5Ocm/dPeXuvtbSd6bZPu6JWZT6e5/TOINEGvq7q90963T/QeS3JFk27ypGFV3\nPzjdPTgrr12uM6yqqo5K8jNJLp07CxtCxbECrKGqDk/yY919eZJ090PKGxb0siT/qrxhFfcl+WaS\npz9aDGdl4czjWuTFaluSvSfbXfFHFrCOquoHk/xIkpvmTcKopu0wtyT5SpJd3f2ZuTMxtHcl+a0k\nPmqTRXSSv62q3VX1q3OHYVjPSfK1qrp82hZzSVUdOncoNoTXJnnP3CEY07T684+TfDnJniT/1d1/\nt9p4/9sAzGraPnVlkvOmlTjw/3T3I939o0mOSvLjVfWSuTMxpqp6ZZJ7phV+NX3BEzmtu0/Myqqt\nX5+2g8NjbUlyYpKLp/nyYJLfnTcSo6uqpyU5O8n75s7CmKrquUnekuTYJD+Q5LCqOne18YsUOHuS\nHLPX46Om5wCelGmZ4JVJ/qK7r547D+Oblqtfk+SkubMwrNOSnD2da/KeJD9RVY+7jxySpLvvnm6/\nmmRnVo4PgMe6K8md3f1P0+Mrs1LowBN5RZKbp+sLPJ6TktzQ3V/v7oeTfCDJi1cbvEiBszvJ86rq\n2Ok05Ncl8YkOPBH/48mi/jTJZ7r7ormDMK6qelZVPWO6f2iSn0py67ypGFV3n9/dx3T3c7PynuX6\n7n7D3LkYU1VtnVaCpqqenuSnk3xq3lSMqLvvSXJnVT1/euqlSWznZS2vj+1TPLF/TnJqVR1SVZWV\na8sdqw3estZP6+6Hq+o3klyXlcLnsu5e9Qfy1FZVf5nkjCTPrKovJ7ng0cPeYG9VdVqSn09y+3S2\nSSc5v7v/Zt5kDOjIJH8+vagdlJUVWx+ZOROwOXx/kp1V1Vl5X3xFd183cybG9eYkV0zbYr6Q5Jdm\nzsPAqmprVg4wftPcWRhXd982rRS+OcnDSW5Jcslq49f8GHEAAAAA5uUQYwAAAIDBKXAAAAAABqfA\nAQAAABicAgcAAABgcAocAAAAgMEpcAAAAAAGt2XuAAAAa6mq703ykSSd5MgkDye5N0kl+Z/uPn3G\neAAA+11199wZAAAWVlV/kOSB7r5w7iwAAAeKLVQAwEZT3/Gg6v7p9iVVtauqrqqqz1fVO6vqF6rq\n41V1W1U9Zxr3rKq6sqpumr5ePMc/AgBgGQocAGCj23s58QuSvCnJDyX5xSTP6+6Tk1yW5DenMRcl\nubC7T0nymiSXHsCsAAD7xBk4AMBmsru7702Sqvp8kmun529PcsZ0/2VJjq+qR1fyHFZVW7v7wQOa\nFABgCQocAGAz+cZe9x/Z6/Ej+b/3PZXklO7+1oEMBgDwZNhCBQBsdLX2kO9wXZLzvv3NVSesbxwA\ngPWnwAEANrrVPlJztefPS3LSdLDxp5L82v6JBQCwfnyMOAAAAMDgrMABAAAAGJwCBwAAAGBwChwA\nAACAwSlwAAAAAAanwAEAAAAYnAIHAAAAYHAKHAAAAIDBKXAAAAAABve/j6+1C34XlU4AAAAASUVO\nRK5CYII=\n",
"text/plain": [
"<pyannote.core.annotation.Annotation at 0x7fda66344e10>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"speech_turns = Annotation()\n",
"speech_turns[Segment(0, 1)] = 'A'\n",
"speech_turns[Segment(2, 3)] = 'B'\n",
"speech_turns[Segment(3, 4)] = 'A'\n",
"speech_turns[Segment(5, 6)] = 'C'\n",
"speech_turns[Segment(7, 8)] = 'A'\n",
"speech_turns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Pronounced names"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABHAAAACxCAYAAACsjUrYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADB9JREFUeJzt3V+M5XdZx/HPs64gbQMJNhFobQEJ8V9QoWkNRSmCBjUp\nTdQI/mnChfUCpWmMQXtRNyQYb8AQ482GWq0pYFqlGEm0Sq1JGy21tKWFIlQQWmqLqFhqE8Hu48Wc\n6oKdnTPds/t7Zni9ksmcOfntmWc335w5+57v73equwMAAADAXAeWHgAAAACAYxNwAAAAAIYTcAAA\nAACGE3AAAAAAhhNwAAAAAIYTcAAAAACGE3AAAAAAhhNwAAAAAIYTcAAAAACGE3AAAAAAhhNwAAAA\nAIYTcPaRqvp0Vf3Q0nMAAAAAmyXgAAAAAAwn4AAAAAAMt68CTlW9paruq6pHquqeqrpo6ZkWcG5V\nfbSq/rWqrqyqpy09EAAAAHB8qrs38kCHL7p6Mw+U5JLrL66n8ueq6ieS3NzdD1fVTyW5Ksm3dffD\nm5rtmN56YGP/BrniyK7/Darq00m+lOS1SR5L8mdJbuzuKzY2FwAAAHDS7asdON39x0/Emu6+Nskn\nk5y77FQn3e9094Pd/cUkb0vyhqUHAgAAAI7PwU090FPdNbNJVXVxksuSPH9116lJTj9pAzyFXTMn\nwANH3f5MkuctNQgAAACwGRsLOEurqrOSHE7yqu7+29V9dySZEFVOpm896vbZSR5cahAAAABgM/bT\nKVSnJjmS5AtVdaCq3pjkuxeeaQlvqqozqurZSS5P8t6lBwIAAACOz74JON19b5K3J/m7JA8l+a4k\nNy861MnXSd6d5IYk92XrGkBvW3QiAAAA4Lht7F2oAAAAADgx9s0OHAAAAID9SsABAAAAGE7AAQAA\nABhOwAEAAAAYTsABAAAAGE7AAQAAABju4LoHVpX3GwcAAADYsO6unY5ZO+CsHvCpT8PXlUOHDuXQ\noUNLj8EeYK2wG9YL67JW2A3rhXVZK+yG9cK6qnZsN0mcQgUAAAAwnoADAAAAMJyAwwlxwQUXLD0C\ne4S1wm5YL6zLWmE3rBfWZa2wG9YLm1brXtemqto1cAAAAAA2p6rWuoixHTgAAAAAwwk4AAAAAMMJ\nOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4\nAAAAAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgA\nAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAA\nAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4AAAA\nAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAA\nwwk4AAAAAMONDDiHL7o6hy+6et9+PwDYmLce2PrYr98PTjCvOwHYK7wCAwAAABhOwAEAAAAYTsAB\nAAAAGE7AAQAAABju4NIDHIsLvAHAmlxYGI6L150ATOfVHgAAAMBwo3fgXHL9xSfl+/iNCwB73hVH\nTs73sdOHfcrrTgCm8yoMAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYLjq7vUOrOp1jwUAAABgZ1WV\n7q6djrMDBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcA\nAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAA\nAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAA\nYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABg\nOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4\nAQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBhxPipptuWnoE9ghrhd2wXliXtcJu\nWC+sy1phN6wXNk3A4YTwZMW6rBV2w3phXdYKu2G9sC5rhd2wXtg0AQcAAABgOAEHAAAAYLjq7vUO\nrFrvQAAAAADW1t210zFrBxwAAAAAluEUKgAAAIDhBBwAAACA4dYKOFX12qr6eFV9oqrecqKHYu+q\nqiur6uGq+sjSszBbVZ1ZVTdW1Uer6u6qevPSMzFTVT29qm6tqjtW6+U3l56J+arqQFV9uKr+dOlZ\nmK2q/qmq7lo9x3xo6XmYq6qeVVXXVtW9q59H5y09EzNV1YtXzykfXn3+D6912U5V/frqOeUjVXVN\nVT1t22N3ugZOVR1I8okkr07yYJLbkry+uz++yaHZH6rqFUkeTXJ1d79k6XmYq6qek+Q53X1nVZ2W\n5PYkr/PcwpOpqlO6+7Gq+oYktyT5le6+Zem5mKuqLkvysiTP7O4Ll56HuarqU0le1t3/vvQszFZV\nv5/kb7r7qqo6mOSU7n5k4bEYbvX/6QeSnNfd9y89D7NU1dlJ/jrJt3f3l6vqj5J8oLuvfrLj19mB\nc26ST3b3Z7r7K0nem+R1G5uYfaW7b07iBRA76u6HuvvO1e1Hk9yb5Ixlp2Kq7n5sdfPp2frZ5XmG\nbVXVmUl+LMm7lp6FPaHisgLsoKqemeQHuvuqJOnu/xZvWNNrkvyjeMM2Hkny5SSnPhGGs7Vx5kmt\n88PqjCRHL7YH4j9ZwAZV1fOTfG+SW5edhKlWp8PckeShJDd198eWnonRfjvJrybxVpuso5P8ZVXd\nVlW/sPQwjPWCJF+oqqtWp8UcrqpnLD0Ue8JPJ3nP0kMw02r359uTfDbJ55J8sbv/arvj/bYBWNTq\n9Knrkly62okD/093H+nu70tyZpIfrKpXLj0TM1XVjyd5eLXDr1YfcCznd/dLs7Vr602r08Hhax1M\n8tIkv7taL48l+bVlR2K6qvrGJBcmuXbpWZipql6Y5LIkZyd5XpLTqupntjt+nYDzuSRnHfX1mav7\nAI7LapvgdUn+sLvfv/Q8zLfarv6BJOcsPQtjnZ/kwtV1Td6T5FVV9aTnkUOSdPc/rz7/S5L3Zevy\nAfC1Hkhyf3f//err67IVdOBYfjTJ7avnF3gy5yS5pbv/rbsfT/InSV6+3cHrBJzbkryoqs5eXQ35\n9Um8owPH4jeerOv3knysu9+59CDMVVWnV9WzVrefkeSHk9y57FRM1d2Xd/dZ3f3CbL1mubG7L156\nLmaqqlNWO0FTVacm+ZEk9yw7FRN198NJ7q+qF6/uenUSp/OykzfE6VMc2z8k+f6q+qaqqmw9t9y7\n3cEHd3q07n68qn4pyQ3ZCj5Xdve2D8jXt6p6d5ILknxzVX02yW88cbE3OFpVnZ/kZ5Pcvbq2SSe5\nvLv/fNnJGOi5Sf5g9UPtQLZ2bH1w4ZmA/eFbkryvqjpbr4uv6e4bFp6Jud6c5JrVaTGfSvLGhedh\nsKo6JVsXML5k6VmYq7vvWu0Uvj3J40nuSHJ4u+N3fBtxAAAAAJblIsYAAAAAwwk4AAAAAMMJOAAA\nAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDHVx6AACAnVTVs5N8MEkneW6Sx5N8Pkkl+c/ufsWC4wEA\nnHDV3UvPAACwtqq6Ismj3f2OpWcBADhZnEIFAOw19VVfVH1p9fmVVXVTVV1fVfdV1W9V1c9V1Yeq\n6q6qesHquNOr6rqqunX18fIl/hIAALsh4AAAe93R24lfkuSSJN+Z5OeTvKi7z01yZZJfXh3zziTv\n6O7zkvxkknedxFkBAJ4S18ABAPaT27r780lSVfcl+YvV/XcnuWB1+zVJvqOqntjJc1pVndLdj53U\nSQEAdkHAAQD2k/866vaRo74+kv973VNJzuvur5zMwQAAjodTqACAva52PuSr3JDk0v/9w1Xfs9lx\nAAA2T8ABAPa67d5Sc7v7L01yzurCxvck+cUTMxYAwOZ4G3EAAACA4ezAAQAAABhOwAEAAAAYTsAB\nAAAAGE7AAQAAABhOwAEAAAAYTsABAAAAGE7AAQAAABhOwAEAAAAY7n8AMp1gbWmyoB4AAAAASUVO\nRK5CYII=\n",
"text/plain": [
"<pyannote.core.annotation.Annotation at 0x7fda66390e90>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pronounced_names = Annotation()\n",
"pronounced_names[Segment(0.2, 0.4)] = 'a'\n",
"pronounced_names[Segment(3.5, 3.7)] = 'b'\n",
"pronounced_names[Segment(5.0, 5.2)] = 'a'\n",
"pronounced_names"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Cooccurrence matrix"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We load speech turns and pronounced names in a convenient data structure"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('A', ['a']), ('B', []), ('A', ['b']), ('C', ['a']), ('A', [])]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = []\n",
"for segment, _, speaker in speech_turns.itertracks(label=True):\n",
" names = [name for _, _, name in pronounced_names.crop(segment).itertracks(label=True)]\n",
" data.append((speaker, names))\n",
"data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First speech turn is from speaker **A** and contains pronounced name **a**. \n",
"Second speech turn is from speaker **B** and does not contain any pronounced name. \n",
"... etc ..."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"speakers = sorted(speech_turns.labels())\n",
"names = sorted(pronounced_names.labels())\n",
"n_speakers = len(speakers)\n",
"n_names = len(names)\n",
"counts = DataArray(np.zeros((n_speakers, n_names)), coords=[('speaker', speakers), ('name', names)])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# for each speech turn\n",
"for i, (_, names) in enumerate(data):\n",
" # i is the index of the current speech turn\n",
" # names contain all names pronounced in current speech turn\n",
" \n",
" if i > 0:\n",
" # get the speaker of the previous speech turn (i-1)\n",
" previous_speaker = data[i-1][0]\n",
" # increment coocurrence matrix accordingly\n",
" for name in names:\n",
" counts.loc[previous_speaker, name] += 1\n",
"\n",
" if i < len(data)-1:\n",
" # get the speaker of the next speech turns (i+1)\n",
" next_speaker = data[i+1][0]\n",
" # increment cooccurrence matrix accordingly\n",
" counts.loc[next_speaker, names] += 1"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"<xarray.DataArray (speaker: 3, name: 2)>\n",
"array([[ 2., 0.],\n",
" [ 1., 1.],\n",
" [ 0., 1.]])\n",
"Coordinates:\n",
" * speaker (speaker) |S1 'A' 'B' 'C'\n",
" * name (name) |S1 'a' 'b'"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"counts"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Name **a** is pronounced twice in the neighborhood of speaker **A**. \n",
"Name **a** is pronounced once in the neighborhood of speaker **B**. \n",
"Name **b** is pronounced once in the neighborhood of speaker **B**. \n",
"Name **b** is pronounced once in the neighborhood of speaker **C**. "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment