Created
February 17, 2016 13:40
-
-
Save hbredin/46669147d8d39a5ad224 to your computer and use it in GitHub Desktop.
Cooccurrence matrix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Here is a simple example on how to compute speaker/name cooccurrence matrix." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Populating the interactive namespace from numpy and matplotlib\n" | |
] | |
} | |
], | |
"source": [ | |
"%pylab inline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from pyannote.core import Segment, Annotation\n", | |
"from xarray import DataArray" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Speech turns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAABHAAAACxCAYAAACsjUrYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADTxJREFUeJzt3X+s73VdB/Dni64BV4YtjaTLj3TOQVtSyACFktJCE7my\nWSqVrlr2RyVT1y+24rqV859wrPEPgygbaRO9oLKCEm8rJnhDQFTMzFS4IWhWQGwq8OqP88GuxOF8\nv5dz7+d9Do/Hdvb9sfc5e96z9z7f73ne9/v9re4OAAAAAOM6aO4AAAAAADwxBQ4AAADA4BQ4AAAA\nAINT4AAAAAAMToEDAAAAMDgFDgAAAMDgFDgAAAAAg1PgAAAAAAxOgQMAAAAwOAUOAAAAwOAUOAAA\nAACDU+AAAAAADG7TFThVtauqvl5VT5s7yxyq6otV9WBV3VdV/1FVH6qqbXPnmkNVnVtVu6vq/qra\nU1XXVNVpc+cCAACAZW2qAqeqjk1ycpJ7k5w9c5y5dJJXdvfhSY7Myu/iT+aNdOBV1VuTXJjkD5Mc\nkeSYJBcnedWcuQAAAGBfVHfPnWHdVNXvJzkpyU1JXtTdT7k/1qvq35L8SndfPz1+RZJ3dfdx8yY7\ncKrq8CR7kryxuz8wdx4AAAB4sras1w/as+3odWuCtu25s/bxW9+Q5IIku5PsqKrv6+6vrleuRZx6\nwbXr8nu48e1n7uvv4NuqamuS1yb52JNPtLjtO89at7lw9Tkf3pffw4uSHJzkqvXKAQAAAHNatwJn\nblV1epJtST7Y3Q9U1aeTnJvkonmTzeKqqnooyWFZ2UJ15sx5DrRnJvladz8ydxAAAABYD5tmC1VV\nXZLkiO5+9fT4/CSv6e4T5012YE1bqH65uz9aVZXk1UkuTXJ8d987b7oDo6rOTPKhJIcocQAAANgM\nNsUhxlV1SJKfS/KTVXV3Vd2d5G1JTqiqH5433SwqSXrFziQPJzl93kgH1MeSfCMr5RUAAABseJui\nwElyTpKHkhyf5ITp6/gk/5DkjTPmml1VbU/yPUnumDvLgdLd92XlLKSLq2p7VR1aVVuq6uVV9c65\n8wEAAMCyNsUWqqr66yS3d/dvP+b5n83KGThHPVW20kxbqI7IyqqbTvKlJO/o7vfOGmwGVfX6JG9N\nclyS+5PcnOSPuvvGWYMBAADAkjZFgQMAAACwmW2WLVQAAAAAm5YCBwAAAGBwChwAAACAwSlwAAAA\nAAanwAEAAAAYnAIHAAAAYHBbFh1YVT5vHAAAAGCddXetNWbhAmf6gfuehqeUHTt2ZMeOHXPHYAMw\nV1iG+cKizBWWYb6wKHOFZZgvLKpqze4miS1UAAAAAMNT4AAAAAAMToHDfnHGGWfMHYENwlxhGeYL\nizJXWIb5wqLMFZZhvrDeatFzbaqqnYEDAAAAsH6qaqFDjK3AAQAAABicAgcAAABgcAocAAAAgMEp\ncAAAAAAGp8ABAAAAGJwCBwAAAGBwChwAAACAwSlwAAAAAAanwAEAAAAYnAIHAAAAYHAKHAAAAIDB\nKXAAAAAABqfAAQAAABicAgcAAABgcAocAAAAgMEpcAAAAAAGp8ABAAAAGJwCBwAAAGBwChwAAACA\nwSlwAAAAAAanwAEAAAAYnAIHAAAAYHAKHAAAAIDBKXAAAAAABqfAAQAAABicAgcAAABgcAocAAAA\ngMEpcAAAAAAGp8ABAAAAGJwCBwAAAGBwChwAAACAwSlwAAAAAAanwAEAAAAYnAIHAAAAYHAKHAAA\nAIDBKXAAAAAABqfAAQAAABicAgcAAABgcAocAAAAgMEpcAAAAAAGp8ABAAAAGJwCBwAAAGBwChwA\nAACAwSlwAAAAAAanwGHd7dl2dPZsO3ruGGwAp15wbU694Nq5Y7BBuLawKHOFZWzfeVa27zxr7hhs\nAOYKy/BaxKKWmScKHAAAAIDBKXAAAAAABqfAAQAAABicAgcAAABgcFuWGewQJmB/cJAxi3j/dOu1\nCNgfHE4L7A/et7CerMABAAAAGNxSK3C27blzf+VgE9Eys6wb337m3BHYAPZcunLrtYi1eB1iX1x9\nzofnjsDgrNJiX3jfwlp8jDgAAADAJqLAAQAAABicAgcAAABgcAocAAAAgMFVdy82sKoXHQsAAADA\n2qoq3V1rjbMCBwAAAGBwChwAAACAwSlwAAAAAAanwAEAAAAYnAIHAAAAYHAKHAAAAIDBKXAAAAAA\nBqfAAQAAABicAgcAAABgcAocAAAAgMEpcAAAAAAGp8ABAAAAGJwCBwAAAGBwChwAAACAwSlwAAAA\nAAanwAEAAAAYnAIHAAAAYHAKHAAAAIDBKXAAAAAABqfAAQAAABicAgcAAABgcAocAAAAgMEpcAAA\nAAAGp8ABAAAAGJwCBwAAAGBwChwAAACAwSlwAAAAAAanwAEAAAAYnAIHAAAAYHAKHAAAAIDBKXAA\nAAAABqfAAQAAABicAgcAAABgcAocAAAAgMEpcAAAAAAGp8ABAAAAGJwCBwAAAGBwChwAAACAwSlw\nAAAAAAanwAEAAAAYnAIHAAAAYHAKHAAAAIDBKXAAAAAABqfAAQAAABicAgcAAABgcAoc9otdu3bN\nHYENwlxhGeYLizJXWIb5wqLMFZZhvrDeFDjsFy5WLMpcYRnmC4syV1iG+cKizBWWYb6w3hQ4AAAA\nAINT4AAAAAAMrrp7sYFViw0EAAAAYGHdXWuNWbjAAQAAAGAetlABAAAADE6BAwAAADC4hQqcqnp5\nVX22qj5XVb+zv0OxcVXVZVV1T1V9cu4sjK2qjqqq66vq01V1e1W9ee5MjKmqDq6qm6rqlmm+vGPu\nTIyvqg6qqk9U1QfnzsLYquqLVXXbdI35+Nx5GFdVPaOq3ldVd0yvR6fMnYkxVdXzp2vKJ6bb//Ze\nl9VU1e9N15RPVtUVVfXdq45d6wycqjooyeeSvDTJvyfZneR13f3Z9QzN5lBVpyd5IMm7u/sFc+dh\nXFX17CTP7u5bq+qwJDcn2e7awuOpqq3d/WBVfVeSG5K8rbtvmDsX46qqtyR5YZLDu/vsufMwrqr6\nQpIXdvd/zp2FsVXVnyX5++6+vKq2JNna3ffNHIvBTX9P35XklO6+c+48jKWqjk3y0STHdfc3q+qv\nklzT3e9+vPGLrMA5Ocm/dPeXuvtbSd6bZPu6JWZT6e5/TOINEGvq7q90963T/QeS3JFk27ypGFV3\nPzjdPTgrr12uM6yqqo5K8jNJLp07CxtCxbECrKGqDk/yY919eZJ090PKGxb0siT/qrxhFfcl+WaS\npz9aDGdl4czjWuTFaluSvSfbXfFHFrCOquoHk/xIkpvmTcKopu0wtyT5SpJd3f2ZuTMxtHcl+a0k\nPmqTRXSSv62q3VX1q3OHYVjPSfK1qrp82hZzSVUdOncoNoTXJnnP3CEY07T684+TfDnJniT/1d1/\nt9p4/9sAzGraPnVlkvOmlTjw/3T3I939o0mOSvLjVfWSuTMxpqp6ZZJ7phV+NX3BEzmtu0/Myqqt\nX5+2g8NjbUlyYpKLp/nyYJLfnTcSo6uqpyU5O8n75s7CmKrquUnekuTYJD+Q5LCqOne18YsUOHuS\nHLPX46Om5wCelGmZ4JVJ/qK7r547D+Oblqtfk+SkubMwrNOSnD2da/KeJD9RVY+7jxySpLvvnm6/\nmmRnVo4PgMe6K8md3f1P0+Mrs1LowBN5RZKbp+sLPJ6TktzQ3V/v7oeTfCDJi1cbvEiBszvJ86rq\n2Ok05Ncl8YkOPBH/48mi/jTJZ7r7ormDMK6qelZVPWO6f2iSn0py67ypGFV3n9/dx3T3c7PynuX6\n7n7D3LkYU1VtnVaCpqqenuSnk3xq3lSMqLvvSXJnVT1/euqlSWznZS2vj+1TPLF/TnJqVR1SVZWV\na8sdqw3estZP6+6Hq+o3klyXlcLnsu5e9Qfy1FZVf5nkjCTPrKovJ7ng0cPeYG9VdVqSn09y+3S2\nSSc5v7v/Zt5kDOjIJH8+vagdlJUVWx+ZOROwOXx/kp1V1Vl5X3xFd183cybG9eYkV0zbYr6Q5Jdm\nzsPAqmprVg4wftPcWRhXd982rRS+OcnDSW5Jcslq49f8GHEAAAAA5uUQYwAAAIDBKXAAAAAABqfA\nAQAAABicAgcAAABgcAocAAAAgMEpcAAAAAAGt2XuAAAAa6mq703ykSSd5MgkDye5N0kl+Z/uPn3G\neAAA+11199wZAAAWVlV/kOSB7r5w7iwAAAeKLVQAwEZT3/Gg6v7p9iVVtauqrqqqz1fVO6vqF6rq\n41V1W1U9Zxr3rKq6sqpumr5ePMc/AgBgGQocAGCj23s58QuSvCnJDyX5xSTP6+6Tk1yW5DenMRcl\nubC7T0nymiSXHsCsAAD7xBk4AMBmsru7702Sqvp8kmun529PcsZ0/2VJjq+qR1fyHFZVW7v7wQOa\nFABgCQocAGAz+cZe9x/Z6/Ej+b/3PZXklO7+1oEMBgDwZNhCBQBsdLX2kO9wXZLzvv3NVSesbxwA\ngPWnwAEANrrVPlJztefPS3LSdLDxp5L82v6JBQCwfnyMOAAAAMDgrMABAAAAGJwCBwAAAGBwChwA\nAACAwSlwAAAAAAanwAEAAAAYnAIHAAAAYHAKHAAAAIDBKXAAAAAABve/j6+1C34XlU4AAAAASUVO\nRK5CYII=\n", | |
"text/plain": [ | |
"<pyannote.core.annotation.Annotation at 0x7fda66344e10>" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"speech_turns = Annotation()\n", | |
"speech_turns[Segment(0, 1)] = 'A'\n", | |
"speech_turns[Segment(2, 3)] = 'B'\n", | |
"speech_turns[Segment(3, 4)] = 'A'\n", | |
"speech_turns[Segment(5, 6)] = 'C'\n", | |
"speech_turns[Segment(7, 8)] = 'A'\n", | |
"speech_turns" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Pronounced names" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAABHAAAACxCAYAAACsjUrYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADB9JREFUeJzt3V+M5XdZx/HPs64gbQMJNhFobQEJ8V9QoWkNRSmCBjUp\nTdQI/mnChfUCpWmMQXtRNyQYb8AQ482GWq0pYFqlGEm0Sq1JGy21tKWFIlQQWmqLqFhqE8Hu48Wc\n6oKdnTPds/t7Zni9ksmcOfntmWc335w5+57v73equwMAAADAXAeWHgAAAACAYxNwAAAAAIYTcAAA\nAACGE3AAAAAAhhNwAAAAAIYTcAAAAACGE3AAAAAAhhNwAAAAAIYTcAAAAACGE3AAAAAAhhNwAAAA\nAIYTcPaRqvp0Vf3Q0nMAAAAAmyXgAAAAAAwn4AAAAAAMt68CTlW9paruq6pHquqeqrpo6ZkWcG5V\nfbSq/rWqrqyqpy09EAAAAHB8qrs38kCHL7p6Mw+U5JLrL66n8ueq6ieS3NzdD1fVTyW5Ksm3dffD\nm5rtmN56YGP/BrniyK7/Darq00m+lOS1SR5L8mdJbuzuKzY2FwAAAHDS7asdON39x0/Emu6+Nskn\nk5y77FQn3e9094Pd/cUkb0vyhqUHAgAAAI7PwU090FPdNbNJVXVxksuSPH9116lJTj9pAzyFXTMn\nwANH3f5MkuctNQgAAACwGRsLOEurqrOSHE7yqu7+29V9dySZEFVOpm896vbZSR5cahAAAABgM/bT\nKVSnJjmS5AtVdaCq3pjkuxeeaQlvqqozqurZSS5P8t6lBwIAAACOz74JON19b5K3J/m7JA8l+a4k\nNy861MnXSd6d5IYk92XrGkBvW3QiAAAA4Lht7F2oAAAAADgx9s0OHAAAAID9SsABAAAAGE7AAQAA\nABhOwAEAAAAYTsABAAAAGE7AAQAAABju4LoHVpX3GwcAAADYsO6unY5ZO+CsHvCpT8PXlUOHDuXQ\noUNLj8EeYK2wG9YL67JW2A3rhXVZK+yG9cK6qnZsN0mcQgUAAAAwnoADAAAAMJyAwwlxwQUXLD0C\ne4S1wm5YL6zLWmE3rBfWZa2wG9YLm1brXtemqto1cAAAAAA2p6rWuoixHTgAAAAAwwk4AAAAAMMJ\nOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4\nAAAAAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgA\nAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAA\nAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4AAAA\nAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDCTgAAAAA\nwwk4AAAAAMONDDiHL7o6hy+6et9+PwDYmLce2PrYr98PTjCvOwHYK7wCAwAAABhOwAEAAAAYTsAB\nAAAAGE7AAQAAABju4NIDHIsLvAHAmlxYGI6L150ATOfVHgAAAMBwo3fgXHL9xSfl+/iNCwB73hVH\nTs73sdOHfcrrTgCm8yoMAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYLjq7vUOrOp1jwUAAABgZ1WV\n7q6djrMDBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcA\nAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAA\nAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAA\nYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABg\nOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBBwAAAGA4\nAQcAAABgOAEHAAAAYDgBBwAAAGA4AQcAAABgOAEHAAAAYDgBhxPipptuWnoE9ghrhd2wXliXtcJu\nWC+sy1phN6wXNk3A4YTwZMW6rBV2w3phXdYKu2G9sC5rhd2wXtg0AQcAAABgOAEHAAAAYLjq7vUO\nrFrvQAAAAADW1t210zFrBxwAAAAAluEUKgAAAIDhBBwAAACA4dYKOFX12qr6eFV9oqrecqKHYu+q\nqiur6uGq+sjSszBbVZ1ZVTdW1Uer6u6qevPSMzFTVT29qm6tqjtW6+U3l56J+arqQFV9uKr+dOlZ\nmK2q/qmq7lo9x3xo6XmYq6qeVVXXVtW9q59H5y09EzNV1YtXzykfXn3+D6912U5V/frqOeUjVXVN\nVT1t22N3ugZOVR1I8okkr07yYJLbkry+uz++yaHZH6rqFUkeTXJ1d79k6XmYq6qek+Q53X1nVZ2W\n5PYkr/PcwpOpqlO6+7Gq+oYktyT5le6+Zem5mKuqLkvysiTP7O4Ll56HuarqU0le1t3/vvQszFZV\nv5/kb7r7qqo6mOSU7n5k4bEYbvX/6QeSnNfd9y89D7NU1dlJ/jrJt3f3l6vqj5J8oLuvfrLj19mB\nc26ST3b3Z7r7K0nem+R1G5uYfaW7b07iBRA76u6HuvvO1e1Hk9yb5Ixlp2Kq7n5sdfPp2frZ5XmG\nbVXVmUl+LMm7lp6FPaHisgLsoKqemeQHuvuqJOnu/xZvWNNrkvyjeMM2Hkny5SSnPhGGs7Vx5kmt\n88PqjCRHL7YH4j9ZwAZV1fOTfG+SW5edhKlWp8PckeShJDd198eWnonRfjvJrybxVpuso5P8ZVXd\nVlW/sPQwjPWCJF+oqqtWp8UcrqpnLD0Ue8JPJ3nP0kMw02r359uTfDbJ55J8sbv/arvj/bYBWNTq\n9Knrkly62okD/093H+nu70tyZpIfrKpXLj0TM1XVjyd5eLXDr1YfcCznd/dLs7Vr602r08Hhax1M\n8tIkv7taL48l+bVlR2K6qvrGJBcmuXbpWZipql6Y5LIkZyd5XpLTqupntjt+nYDzuSRnHfX1mav7\nAI7LapvgdUn+sLvfv/Q8zLfarv6BJOcsPQtjnZ/kwtV1Td6T5FVV9aTnkUOSdPc/rz7/S5L3Zevy\nAfC1Hkhyf3f//err67IVdOBYfjTJ7avnF3gy5yS5pbv/rbsfT/InSV6+3cHrBJzbkryoqs5eXQ35\n9Um8owPH4jeerOv3knysu9+59CDMVVWnV9WzVrefkeSHk9y57FRM1d2Xd/dZ3f3CbL1mubG7L156\nLmaqqlNWO0FTVacm+ZEk9yw7FRN198NJ7q+qF6/uenUSp/OykzfE6VMc2z8k+f6q+qaqqmw9t9y7\n3cEHd3q07n68qn4pyQ3ZCj5Xdve2D8jXt6p6d5ILknxzVX02yW88cbE3OFpVnZ/kZ5Pcvbq2SSe5\nvLv/fNnJGOi5Sf5g9UPtQLZ2bH1w4ZmA/eFbkryvqjpbr4uv6e4bFp6Jud6c5JrVaTGfSvLGhedh\nsKo6JVsXML5k6VmYq7vvWu0Uvj3J40nuSHJ4u+N3fBtxAAAAAJblIsYAAAAAwwk4AAAAAMMJOAAA\nAADDCTgAAAAAwwk4AAAAAMMJOAAAAADDHVx6AACAnVTVs5N8MEkneW6Sx5N8Pkkl+c/ufsWC4wEA\nnHDV3UvPAACwtqq6Ismj3f2OpWcBADhZnEIFAOw19VVfVH1p9fmVVXVTVV1fVfdV1W9V1c9V1Yeq\n6q6qesHquNOr6rqqunX18fIl/hIAALsh4AAAe93R24lfkuSSJN+Z5OeTvKi7z01yZZJfXh3zziTv\n6O7zkvxkknedxFkBAJ4S18ABAPaT27r780lSVfcl+YvV/XcnuWB1+zVJvqOqntjJc1pVndLdj53U\nSQEAdkHAAQD2k/866vaRo74+kv973VNJzuvur5zMwQAAjodTqACAva52PuSr3JDk0v/9w1Xfs9lx\nAAA2T8ABAPa67d5Sc7v7L01yzurCxvck+cUTMxYAwOZ4G3EAAACA4ezAAQAAABhOwAEAAAAYTsAB\nAAAAGE7AAQAAABhOwAEAAAAYTsABAAAAGE7AAQAAABhOwAEAAAAY7n8AMp1gbWmyoB4AAAAASUVO\nRK5CYII=\n", | |
"text/plain": [ | |
"<pyannote.core.annotation.Annotation at 0x7fda66390e90>" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pronounced_names = Annotation()\n", | |
"pronounced_names[Segment(0.2, 0.4)] = 'a'\n", | |
"pronounced_names[Segment(3.5, 3.7)] = 'b'\n", | |
"pronounced_names[Segment(5.0, 5.2)] = 'a'\n", | |
"pronounced_names" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Cooccurrence matrix" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We load speech turns and pronounced names in a convenient data structure" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('A', ['a']), ('B', []), ('A', ['b']), ('C', ['a']), ('A', [])]" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data = []\n", | |
"for segment, _, speaker in speech_turns.itertracks(label=True):\n", | |
" names = [name for _, _, name in pronounced_names.crop(segment).itertracks(label=True)]\n", | |
" data.append((speaker, names))\n", | |
"data" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"First speech turn is from speaker **A** and contains pronounced name **a**. \n", | |
"Second speech turn is from speaker **B** and does not contain any pronounced name. \n", | |
"... etc ..." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"speakers = sorted(speech_turns.labels())\n", | |
"names = sorted(pronounced_names.labels())\n", | |
"n_speakers = len(speakers)\n", | |
"n_names = len(names)\n", | |
"counts = DataArray(np.zeros((n_speakers, n_names)), coords=[('speaker', speakers), ('name', names)])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# for each speech turn\n", | |
"for i, (_, names) in enumerate(data):\n", | |
" # i is the index of the current speech turn\n", | |
" # names contain all names pronounced in current speech turn\n", | |
" \n", | |
" if i > 0:\n", | |
" # get the speaker of the previous speech turn (i-1)\n", | |
" previous_speaker = data[i-1][0]\n", | |
" # increment coocurrence matrix accordingly\n", | |
" for name in names:\n", | |
" counts.loc[previous_speaker, name] += 1\n", | |
"\n", | |
" if i < len(data)-1:\n", | |
" # get the speaker of the next speech turns (i+1)\n", | |
" next_speaker = data[i+1][0]\n", | |
" # increment cooccurrence matrix accordingly\n", | |
" counts.loc[next_speaker, names] += 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<xarray.DataArray (speaker: 3, name: 2)>\n", | |
"array([[ 2., 0.],\n", | |
" [ 1., 1.],\n", | |
" [ 0., 1.]])\n", | |
"Coordinates:\n", | |
" * speaker (speaker) |S1 'A' 'B' 'C'\n", | |
" * name (name) |S1 'a' 'b'" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"counts" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Name **a** is pronounced twice in the neighborhood of speaker **A**. \n", | |
"Name **a** is pronounced once in the neighborhood of speaker **B**. \n", | |
"Name **b** is pronounced once in the neighborhood of speaker **B**. \n", | |
"Name **b** is pronounced once in the neighborhood of speaker **C**. " | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment