Created
March 29, 2016 18:31
-
-
Save karlnapf/e75cadb1dcd36ae2a8ca3fe11279b3f8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python2.7/dist-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n", | |
" warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n" | |
] | |
} | |
], | |
"source": [ | |
"# this is at git commit: 6ef795ba6e83706b903dc84a43a36e496597c394 \n", | |
"from modshogun import *\n", | |
"import numpy as np\n", | |
"import matplotlib.pyplot as plt\n", | |
"%matplotlib inline\n", | |
"\n", | |
"\n", | |
"import time" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# use scipy for generating samples\n", | |
"from scipy.stats import norm, laplace\n", | |
"\n", | |
"def sample_gaussian_vs_laplace(n=220, mu=0.0, sigma2=1, b=np.sqrt(0.5)): \n", | |
" # sample from both distributions\n", | |
" X=norm.rvs(size=n, loc=mu, scale=sigma2)\n", | |
" Y=laplace.rvs(size=n, loc=mu, scale=b)\n", | |
" \n", | |
" return X,Y\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(220,) (220,)\n", | |
"time taken: 20.2334070206\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEACAYAAACwB81wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEX9JREFUeJzt3X+MZWV9x/H3B1cWkLjZquwaFn80WAXTshpdNTRxWitC\n07CkaRA1qUpsTKnV1CaFNU122zQqTTSxafijimZLIHRromCwZUEybWwiWGULslu6pFnEDTvQilgE\nzVK+/eOeSS/jDnPn/ph7h+f9Sk72zHPPuc+XJ8Nnzn3u+ZGqQpL0/HfStAuQJK0NA1+SGmHgS1Ij\nDHxJaoSBL0mNMPAlqRErBn6SjUnuTHJ3kvuSfLJr35xkf5L7k9yaZFPfPruSHE5yKMkFk/wPkCQN\nJoOch5/ktKp6MskLgH8B/hi4GPjvqvrLJFcCm6vqqiTnAtcDbwa2AbcDrylP+JekqRpoSqeqnuxW\nN3b7PAbsBPZ27XuBS7r1i4Ebq+rpqjoCHAZ2jKtgSdJwBgr8JCcluRs4BsxX1UFgS1UtAFTVMeCM\nbvMzgYf6dj/atUmSpmjDIBtV1TPAG5K8GLg1yRywdIrGKRtJmmEDBf6iqvpxkq8DbwIWkmypqoUk\nW4FHus2OAmf17bata3uWJP6BkKQhVFWG3fE5F+ClwKZu/VTgn4F3AFcDV3btVwKf7tbPBe4GTgZe\nDTxA9+XwkvetlfqehQXYM+0arNM613Od66HGdVZnDbvvIEf4Lwf2Jgm9Of/rquob3Zz+viSXAw8C\nl3aVHEyyDzgIHAeuqK5KSdL0rBj4VXUv8MYTtP8Q+I1l9vkU8KmRq5MkjY1X2q5sftoFDGh+2gUM\naH7aBQxoftoFDGh+2gUMYH7aBQxoftoFTNpAF15NpOOkatgvHiSpUaNkp0f4ktQIA1+SGmHgS1Ij\nDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLA\nl6RGGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDVi3QZ+cuqxJDXccuqxadcvSWstVTWd\njpOqqoyyPwxbexilb0mallGyc90e4UuSVmfFwE+yLckdSe5Lcm+SP+zadyf5QZLvdsuFffvsSnI4\nyaEkF0zyP0CSNJgVp3SSbAW2VtWBJKcD3wF2Au8G/qeqPrtk+3OAG4A3A9uA24HX1JKOnNKRpNWb\n6JROVR2rqgPd+hPAIeDMxb5PsMtO4MaqerqqjgCHgR3DFCdJGp9VzeEneRWwHbiza/pIkgNJvpBk\nU9d2JvBQ325H+f8/EJKkKRk48LvpnC8DH+uO9K8BfrGqtgPHgM9MpkRJ0jhsGGSjJBvohf11VXUT\nQFU92rfJ54GvdetHgbP6XtvWtZ3offf0/ThfVfMDVS1JjUgyB8yN5b0GOQ8/yd8C/1VVH+9r21pV\nx7r1PwLeXFXvTXIucD3wFnpTObfhl7aSNBajZOeKR/hJzgfeB9yb5G56KfsJ4L1JtgPPAEeADwNU\n1cEk+4CDwHHgiqVhL0lae15pK0nriFfaSpJWZOBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4\nktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhox0EPMJyXJB4fc9amx\nFiJJDZjqIw7hPT8Zbu9bNsCPN/qIQ0mtGeURh1MO/GH7Pu9xuGeTgS+pNT7TVpK0IgNfkhph4EtS\nIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RGGPiS1IgVAz/JtiR3JLkvyb1JPtq1b06yP8n9SW5Nsqlv\nn11JDic5lOSCSf4HSJIGM8gR/tPAx6vq9cDbgD9I8jrgKuD2qnotcAewCyDJucClwDnARcA1SbyN\ngSRN2YqBX1XHqupAt/4EcAjYBuwE9nab7QUu6dYvBm6sqqer6ghwGNgx5rolSau0qjn8JK8CtgPf\nArZU1QL0/igAZ3SbnQk81Lfb0a5NkjRFA98PP8npwJeBj1XVE727XT7LELeu3NO3PtctkqRFSeYY\nUzgOFPhJNtAL++uq6qaueSHJlqpaSLIVeKRrPwqc1bf7tq7tBPYMUbIktaOq5oH5xZ+T7B72vQad\n0vkicLCqPtfXdjPwgW79/cBNfe2XJTk5yauBs4G7hi1wMjaSpIZbTj027eolaRgrHuEnOR94H3Bv\nkrvpTd18Arga2JfkcuBBemfmUFUHk+wDDgLHgStqWk9ZWdbPGOHhKVvGWYkkrZVmn3jl07IkrUc+\n8UqStCIDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSB\nL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RGGPiS\n1AgDX5IaYeBLUiMMfElqxIqBn+TaJAtJ7ulr253kB0m+2y0X9r22K8nhJIeSXDCpwiVJqzPIEf6X\ngHedoP2zVfXGbvlHgCTnAJcC5wAXAdckydiqlSQNbcXAr6pvAo+d4KUTBflO4MaqerqqjgCHgR0j\nVShJGotR5vA/kuRAki8k2dS1nQk81LfN0a5NkjRlG4bc7xrgz6uqkvwF8BngQ6t/mz1963PdIkla\nlGSOMYXjUIFfVY/2/fh54Gvd+lHgrL7XtnVty9gzTPeS1IyqmgfmF39OsnvY9xp0Sif0zdkn2dr3\n2m8D3+vWbwYuS3JyklcDZwN3DVucJGl8VjzCT3IDvY8TL0nyfWA38GtJtgPPAEeADwNU1cEk+4CD\nwHHgiqqqyZQuSVqNTCuPkxQM2/d5j8M9m4bfP4yyb1V5qqmkqUhSw2aQV9pKUiMMfElqhIEvSY0w\n8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANf\nkhph4EtSI4Z6iHnbNnZP6xrGKQtVT21deTtJGj8Df9V+xgiPR9wyzkokaTWc0pGkRhj4ktQIA1+S\nGmHgS1IjDHxJaoSBL0mNMPAlqREGviQ1wsCXpEasGPhJrk2ykOSevrbNSfYnuT/JrUk29b22K8nh\nJIeSXDCpwiVJqzPIEf6XgHctabsKuL2qXgvcAewCSHIucClwDnARcE2SjK9cSdKwVgz8qvom8NiS\n5p3A3m59L3BJt34xcGNVPV1VR4DDwI7xlCpJGsWwc/hnVNUCQFUdA87o2s8EHurb7mjXJkmasnHd\nLXPI20fu6Vuf6xZJ0qIkc4wpHIcN/IUkW6pqIclW4JGu/ShwVt9227q2ZewZsntJakNVzQPziz8n\n2T3sew06pZNuWXQz8IFu/f3ATX3tlyU5OcmrgbOBu4YtTpI0Pise4Se5gd7HiZck+T6wG/g08PdJ\nLgcepHdmDlV1MMk+4CBwHLiiqoZ9WogkaYwyrTzuPSZw2L7Pexzu2TTCk6eY1r5V5WmqkoaWpIbN\nEa+0laRGGPiS1AgDX5IaYeBLUiMMfElqhIEvSY0w8CWpEQa+JDXCwJekRhj4ktQIA1+SGmHgS1Ij\nDHxJaoSBL0mNMPAlqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWrEhmkX0JaNJKnh9z9loeqp\nreOrR1JLDPw19TNghLwnW8ZViaT2OKUjSY0w8CWpEQa+JDXCwJekRhj4ktSIkc7SSXIEeBx4Bjhe\nVTuSbAb+DnglcAS4tKoeH7FOSdKIRj3CfwaYq6o3VNWOru0q4Paqei1wB7BrxD4kSWMwauDnBO+x\nE9jbre8FLhmxD0nSGIwa+AXcluTbST7UtW2pqgWAqjoGnDFiH5KkMRj1Stvzq+rhJC8D9ie5n5+/\nlHSUS0slSWMyUuBX1cPdv48m+SqwA1hIsqWqFpJsBR5Z/h329K3PdYskaVGSOcYUjqka7gA8yWnA\nSVX1RJIXAfuBPwPeAfywqq5OciWwuaquOsH+NfzB/3mPwz2bht8/rL99e/tXVUZ4A0nrXJIaNgdG\nOcLfAnylu/vjBuD6qtqf5F+BfUkuBx4ELh2hDz3LKHfb9E6bUuuGPsIfuWOP8Ne8bz8dSOvfKEf4\nXmkrSY0w8CWpEQa+JDXCwJekRhj4ktQIn2nbDE/plFpn4DdjlAeo+/B06fnAKR1JaoSBL0mNMPAl\nqREGviQ1wsCXpEYY+JLUCANfkhph4EtSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RGGPiS1AgDX5Ia\n4f3wNQAfniI9Hxj4GoAPT5GeDwx8Tdgonw7ATwjS+Bj4mrBRPh2AnxCk8fFLW0lqhIEvSY2YWOAn\nuTDJvyf5jyRXTqofPd/1vgMYbjn12LC9Jqcem0a/0iRNJPCTnAT8NfAu4PXAe5K8bhJ9Td78tAsY\n0Py0CxjQ/Cq3X/wOYKhly7ChDT/dMny/P12z7x2SzK1VX8NaDzXC+qlzFJM6wt8BHK6qB6vqOHAj\nsHNCfU3Y/LQLGND8tAsY0Pwa9jXKH4t1Y27aBQxgbtoFDGhu2gVM2qTO0jkTeKjv5x/Q+yMgNWDk\nU1GfgZ8OfDCWZHffvp7GqmVN+bTMX398uP0eOGW8dUjjNPKpqCcNvv+ebll0ypb1eFV073uPYafC\nhq97ab/P/uM5uX6nJVXj//ia5K3Anqq6sPv5KqCq6uq+bdbV52ZJmhVVlWH2m1TgvwC4H3gH8DBw\nF/Ceqjo09s4kSQOZyJROVf1vko8A++l9MXytYS9J0zWRI3xJ0uxZsyttk2xOsj/J/UluTbJpme2O\nJPm3JHcnuWsN61vxQrEkf5XkcJIDSbavVW1LanjOOpO8PcmPkny3W/50CjVem2QhyT3Psc0sjOVz\n1jkjY7ktyR1J7ktyb5KPLrPdVMdzkDpnZDw3Jrmzy5f7knxyme2mPZ4r1jnUeFbVmizA1cCfdOtX\nAp9eZrv/BDavVV1dnycBDwCvBF4IHABet2Sbi4BbuvW3AN9ayxpXUefbgZvXurYlNfwqsB24Z5nX\npz6WA9Y5C2O5FdjerZ9O77uxWfzdHKTOqY9nV8dp3b8vAL4FnD9r4zlgnasez7W8l85OYG+3vhe4\nZJntwtrf42eQC8V2An8LUFV3ApuSNb+T46AXtA31Df64VNU3gceeY5NZGMtB6oTpj+WxqjrQrT8B\nHKJ3nUu/qY/ngHXClMcToKqe7FY30suapb8DUx/Pru+V6oRVjudaBusZVbUAvV8O4IxltivgtiTf\nTvJ7a1TbiS4UW/rLunSboyfYZtIGqRPgbd1H0VuSnLs2pa3KLIzloGZmLJO8it4nkjuXvDRT4/kc\ndcIMjGeSk5LcDRwD5qvq4JJNZmI8B6gTVjmeYz1LJ8ltQP9fwtAL8BPNLS33bfH5VfVwkpfRC/5D\n3ZGYBvMd4BVV9WSSi4CvAr805ZrWq5kZyySnA18GPtYdQc+kFeqcifGsqmeANyR5MbA/ydur6p/W\nuo6VDFDnqsdzrEf4VfXOqvqVvuWXu39vBhYWPxYl2Qo8ssx7PNz9+yjwFdbmlgxHgVf0/byta1u6\nzVkrbDNpK9ZZVU8sfhSsqn8AXpjkF9auxIHMwliuaFbGMskGeiF6XVXddIJNZmI8V6pzVsazr54f\nA7cAb1ry0kyM56Ll6hxmPNdySudm4APd+vuBn/uFSHJad4RAkhcBFwDfW4Pavg2cneSVSU4GLuvq\n7Xcz8LtdbW8FfrQ4RbWGVqyzf64xyQ56p97+cG3L7HXP8vOLszCWi5atc4bG8ovAwar63DKvz8p4\nPmedszCeSV6a7gzBJKcC76R38kO/qY/nIHUOM55reS+dq4F9SS4HHgQuBUjycuDzVfVb9KaDvpLe\nbRc2ANdX1f5JF1bLXCiW5MO9l+tvqurrSX4zyQPAT4APTrquYeoEfifJ7wPHgaeAd691nUluoHfn\nwZck+T6wGziZGRrLQepkNsbyfOB9wL3dfG4Bn6B3ptbMjOcgdTID4wm8HNibZPHkkOuq6huz9v/6\nIHUyxHh64ZUkNcJHHEpSIwx8SWqEgS9JjTDwJakRBr4kNcLAl6RGGPiS1AgDX5Ia8X9e2zrHyUWB\nUAAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x7f6b285249d0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"# plain performance test\n", | |
"\n", | |
"X,Y = sample_gaussian_vs_laplace()\n", | |
"print X.shape, Y.shape\n", | |
"\n", | |
"feats_p = RealFeatures(X.reshape(1,len(X)))\n", | |
"feats_q = RealFeatures(Y.reshape(1,len(Y)))\n", | |
"width=1\n", | |
"k = GaussianKernel(10, width) # should have a constructor that does not require to specify cache size\n", | |
"\n", | |
"mmd = QuadraticTimeMMD() # should take p and q as argument\n", | |
"mmd.set_p(feats_p)\n", | |
"mmd.set_q(feats_q)\n", | |
"mmd.set_kernel(k)\n", | |
"\n", | |
"# this should be able to receive a CSignal abort, made my Python stall a couple of times\n", | |
"#mmd.set_num_null_samples(10000)\n", | |
"\n", | |
"start = time.time()\n", | |
"mmd.set_num_null_samples(1000) \n", | |
"null_samples = mmd.sample_null() # seems to take quite long, and not running on all my cpus (only 2/4 it seems). I havent checked properly, but this is only first impression\n", | |
"print \"time taken:\", time.time()-start\n", | |
"plt.hist(null_samples, bins=20);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(1000,) (1000,)\n", | |
"time taken: 0.198345899582\n", | |
"time taken: 32.5579361916\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXUAAAEACAYAAABMEua6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAD3BJREFUeJzt3X+MZWddx/H3py3tFioVqt1VSotFFyORAIGCoDCh1taq\nlD/8AVVLi0FCFAgaQovozh/+wY8YRAwx6LJphY1BBFoVQtvUiykE+rtboJQalS3QDiKLpNpdWvfr\nH3OnToaZvfeec2Zm59n3K7mZe859zjnfnJx85pln7nNOqgpJUhuO2+wCJEnDMdQlqSGGuiQ1xFCX\npIYY6pLUEENdkhoyMdST7E6ykGTfsnXPTXJTktvHP5+zvmVKkqYxTU99D3D+inXvAN5aVc8CdgHv\nHLowSdLsJoZ6Vd0IHFix+n7g1PH77we+NnBdkqQOMs2M0iRnAX9fVc8YL58JfBooIMALquq+9SxU\nkjRZ13+U7gZeV1VnAm8E3j9cSZKkrrr21L9TVY9f9vl/VdWpa2zrzWUkqYOqyqzbnDBlu4xfS+5N\n8uKq+lSSc4EvD12YVpdkvqrmN7uOFnguh+X5HFbXDvHEUE+yF5gDTkuyn8Vvu/w28N4kJwIHx8uS\npE02MdSr6uI1PnrewLVIknpyRunWM9rsAhoy2uwCGjPa7AI05T9Kex0gKcfUJWk2XbPTnrokNcRQ\nl6SGGOqS1BBDXZIaYqhLUkOmnVG6YZI8GfitHru4par+Yah6JGkrOepCHfgNeMEfwXkdvgb5TeAD\nXweeNHRRkrQVHI2hDvz0YZg/fvbtvgR8YPBqJGmrcExdkhpiqEtSQwx1SWqIoS5JDTHUJakhhrok\nNWRiqCfZnWQhyb4V61+X5O4kdyV52/qVKEma1jTfU98DvAe4amlFkjngl4CfrKpHkvzA+pQnSZrF\nxJ56Vd0IHFix+rXA26rqkXGbb65DbZKkGXUdU98JvCjJZ5P8U5LnDFmUJKmbrrcJOAF4QlU9P8lz\ngQ8BZ6/VOMn8ssVRVY06HleSmjQe1p7ru5+uoX4f8BGAqro5yeEkp1XVf67WuKrmOx5Hko4J487u\naGk5ya4u+5l2+CXj15KPAS8ZH3gn8Ji1Al2StHEm9tST7GXxT4LTkuwHdgHvB/YkuQs4BFyynkVK\nkqYzMdSr6uI1PvrNgWuRJPXkjFJJaoihLkkNMdQlqSGGuiQ1xFCXpIYY6pLUEENdkhpiqEtSQwx1\nSWqIoS5JDTHUJakhhrokNcRQl6SGGOqS1BBDXZIaYqhLUkMmhnqS3UkWkuxb5bPfHz+f9InrU54k\naRbT9NT3AOevXJnkDOA84CtDFyVJ6mZiqFfVjcCBVT56F/CmwSuSJHXWaUw9yUuB+6rqroHrkST1\nMPHB0yslORl4C4tDL4+unrDN/LLFUVWNZj2uJLUsyRww13c/M4c68FTgKcCdSQKcAdya5Jyq+sZq\nG1TVfOcKJekYMO7sjpaWk+zqsp9pQz3jF1X1eWDHsgP/G/Dsqlpt3F2StIGm+UrjXuAzwM4k+5Nc\ntqJJMWH4RZK0MSb21Kvq4gmfnz1cOZKkPpxRKkkNMdQlqSGGuiQ1xFCXpIYY6pLUEENdkhpiqEtS\nQwx1SWqIoS5JDTHUJakhhrokNcRQl6SGGOqS1BBDXZIaYqhLUkMMdUlqyDRPPtqdZCHJvmXr3pHk\n7iR3JPm7JI9f3zIlSdOYpqe+Bzh/xbprgadX1TOBe4Erhi5MkjS7iaFeVTcCB1asu76qDo8XPwuc\nsQ61SZJmNMSY+quATwywH0lSTxMfPH0kSf4AeLiq9k5oN79scVRVoz7HPbLv7khS3bfftlD10I7h\n6pGkyZLMAXN999M51JNcClwIvGRS26qa73qc2T10HPTIdLJ9sFIkaUrjzu5oaTnJri77mTbUM34t\nHewC4E3Ai6rqUJcDS5KGN81XGvcCnwF2Jtmf5DLgPcApwHVJbkvy3nWuU5I0hYk99aq6eJXVe9ah\nFklST84olaSGGOqS1BBDXZIaYqhLUkMMdUlqiKEuSQ0x1CWpIb3u/dKmk+h+75hth+Fgx1+U3nNG\nUn+G+vc4RPd7x6THfWe854yk/hx+kaSGGOqS1BBDXZIaYqhLUkMMdUlqiKEuSQ2Z5iEZu5MsJNm3\nbN0Tklyb5J4kn0xy6vqWKUmaxjQ99T3A+SvWXQ5cX1VPA24Arhi6MEnS7CaGelXdCBxYsfoi4Mrx\n+yuBlw1clySpg65j6qdX1QJAVT0AnD5cSZKkrob6R2nXufGSpAF1vffLQpLtVbWQZAfwjSM1TjK/\nbHFUVaOOx5WkJiWZA+b67mfaUM/4teQa4FLg7cArgauPtHFVzXeoTZKOGePO7mhpOcmuLvuZ5iuN\ne4HPADuT7E9yGfA24Lwk9wDnjpclSZtsYk+9qi5e46OfHbgWSVJPziiVpIYY6pLUEENdkhpiqEtS\nQwx1SWqIoS5JDTHUJakhhrokNcRQl6SGGOqS1BBDXZIaYqhLUkMMdUlqiKEuSQ0x1CWpIYa6JDWk\nV6gnuSLJF5LsS/LBJCcOVZgkaXadQz3JWcCrgWdV1TNYfIrSy4cqTJI0u2kfPL2a7wDfBR6X5DDw\nWODrg1QlSeqkc0+9qg4AfwLsB74GfLuqrh+qMEnS7PoMv5wNvBE4C/hh4JQkaz2kWpK0AfoMvzwH\n+HRVfQsgyUeAFwB7VzZMMr9scVRVox7H1SqSkx+Ag9u7bb3tMBzs+Au+z7YA2xaqHtrRfXupDUnm\ngLm+++kT6vcAf5hkG3AIOBe4ebWGVTXf4ziaysHtUB23zXGbsy1AOv4iktoy7uyOlpaT7Oqynz5j\n6ncCVwG3AncCAd7XdX+SpP769NSpqncC7xyoFklST84olaSGGOqS1BBDXZIaYqhLUkMMdUlqiKEu\nSQ0x1CWpIYa6JDWk1+QjDekkkvSZby9JhvrR4xA976EyVCGStjCHXySpIYa6JDXEUJekhhjqktQQ\nQ12SGmKoS1JDeoV6klOT/G2Su5N8IcnzhipMkjS7vt9Tfzfw8ar6lSQnAI8doCZJUkedQz3J44Gf\nqapLAarqEeA7A9UlSeqgz/DLjwDfTLInyW1J3pfk5KEKkyTNrs/wywnAs4HfqapbkvwpcDmwa2XD\nJPPLFkdVNepxXElqTpI5YK7vfvqE+leB+6rqlvHyh4E3r9awquZ7HEeSmjfu7I6WlpN8Twd5Gp2H\nX6pqAbgvyc7xqnOBL3bdnySpv77ffnk98MEkjwH+Fbisf0mSpK56hXpV3Qk8d6BaJEk9OaNUkhpi\nqEtSQwx1SWqIoS5JDTHUJakhhrokNcRQl6SGGOqS1BBDXZIaYqhLUkMMdUlqiKEuSQ0x1CWpIYa6\nJDXEUJekhhjqktSQ3qGe5LgktyW5ZoiCJEndDdFTfwM+m1SSjgq9Qj3JGcCFwF8NU44kqY++PfV3\nAW8CaoBaJEk9dX7wdJJfABaq6o4kc0CO0HZ+2eKoqkZdj6vWnESSjp2CbYfhYMeOybaFqod2dNky\nOfkBOLi923H7HVvtGufoXN/9dA514IXAS5NcCJwMfF+Sq6rqkpUNq2q+x3HUtEN0/0Mvx/XYtkco\nH9ze74/TPsdWq8ad3dHScpJdXfbTefilqt5SVWdW1dnAy4EbVgt0SdLG8XvqktSQPsMvj6qqTwGf\nGmJfkqTu7KlLUkMMdUlqiKEuSQ0x1CWpIYa6JDXEUJekhhjqktQQQ12SGmKoS1JDDHVJaoihLkkN\nMdQlqSGGuiQ1xFCXpIYY6pLUEENdkhrSOdSTnJHkhiRfSHJXktcPWZgkaXZ9nnz0CPB7VXVHklOA\nW5NcW1VfGqg2SdKM+jx4+oGqumP8/kHgbuBJQxUmSZrdIGPqSZ4CPBP43BD7kyR10/vB0+Ohlw8D\nbxj32FdrM79scVRVo77HlY5FyckPwMHt3bbetlD10I5hK2rbRp7vJHPAXLdj/b9eoZ7kBBYD/a+r\n6uq12lXVfJ/jSFpycDtUx23TMZyOZRt3vsed3dGjWye7uhy17/DL+4EvVtW7e+5HkjSAPl9pfCHw\n68BLktye5LYkFwxXmiRpVp2HX6rq08DxA9YiSerJGaWS1BBDXZIaYqhLUkMMdUlqiKEuSQ0x1CWp\nIYa6JDWk971fJM3qJJJ0nXt+zOl3/xU41u55Y6hLG+4QPe4nMmQhW0Sf+6/AsXbPG4dfJKkhhrok\nNcRQl6SGGOqS1BBDXZIaYqhLUkN6hXqSC5J8KcmXk7x5qKIkSd30efLRccCfA+cDTwdekeTHhypM\naxltdgHNGD/oVwPxfB4d+vTUzwHuraqvVNXDwN8AFw1TltY22uwCWjK32QU0Zm6zC1C/UH8ScN+y\n5a+O10mSNsnReJuAh2Hvw3Dzg7Nv+uDxwCmDVyRJW0Squt1TIcnzgfmqumC8fDlQVfX2Fe28cZEk\ndVBVM9/sp0+oHw/cA5wL3A/cBLyiqu7utENJUm+dh1+q6n+T/C5wLYtj87sNdEnaXJ176pKko8/g\nM0qTPCHJtUnuSfLJJKeu0e7fk9yZ5PYkNw1dx1Y2zaSuJH+W5N4kdyR55kbXuJVMOp9JXpzk20lu\nG7/euhl1bgVJdidZSLLvCG28Nqc06Xx2uTbX4zYBlwPXV9XTgBuAK9ZodxiYq6pnVdU561DHljTN\npK4kPw88tap+DHgN8BcbXugWMcMkuX+uqmePX3+8oUVuLXtYPJer8tqc2RHP59hM1+Z6hPpFwJXj\n91cCL1ujXdbp+FvdNJO6LgKuAqiqzwGnJsfW011mMO0kuWPxkUIzq6obgQNHaOK1OYMpzifMeG2u\nR6ieXlULAFX1AHD6Gu0KuC7JzUlevQ51bFXTTOpa2eZrq7TRomknyf3UeLjgH5P8xMaU1iSvzeHN\ndG12+vZLkuuA5b99w2JIrzbes9Z/Yl9YVfcn+UEWw/3u8W8taaPdCpxZVf8zHj74GLBzk2uSoMO1\n2SnUq+q8tT4bD/pvr6qFJDuAb6yxj/vHP/8jyUdZ/DPZUF/s2Zy5bPmM8bqVbZ48oY0WTTyfVfXg\nsvefSPLeJE+sqm9tUI0t8docUJdrcz2GX64BLh2/fyVw9coGSR6b5JTx+8cBPwd8fh1q2YpuBn40\nyVlJTgRezuI5Xe4a4BJ4dGbvt5eGvPQ9Jp7P5WO+Sc5h8au+BvrawtrjvF6bs1vzfHa5Ntfj3i9v\nBz6U5FXAV4BfHRf0Q8BfVtUvsjh089HxLQROAD5YVdeuQy1bzlqTupK8ZvHjel9VfTzJhUn+Bfhv\n4LLNrPloNs35BH45yWuBh4GHgF/bvIqPbkn2sng3xtOS7Ad2ASfitdnJpPNJh2vTyUeS1BC/UihJ\nDTHUJakhhrokNcRQl6SGGOqS1BBDXZIaYqhLUkMMdUlqyP8BD6gfMnPGCbMAAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x7f6b28524810>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"# same as above but with more data\n", | |
"\n", | |
"X,Y = sample_gaussian_vs_laplace(n=1000)\n", | |
"print X.shape, Y.shape\n", | |
"\n", | |
"feats_p = RealFeatures(X.reshape(1,len(X)))\n", | |
"feats_q = RealFeatures(Y.reshape(1,len(Y)))\n", | |
"width=1\n", | |
"k = GaussianKernel(10, width)\n", | |
"\n", | |
"joint_features = RealFeatures(np.hstack((X, Y)).reshape(1, len(X)+len(Y)))\n", | |
"k.init(joint_features,joint_features)\n", | |
"start = time.time()\n", | |
"k.get_kernel_matrix()\n", | |
"print \"time taken:\", time.time()-start\n", | |
"\n", | |
"mmd = QuadraticTimeMMD()\n", | |
"mmd.set_p(feats_p)\n", | |
"mmd.set_q(feats_q)\n", | |
"mmd.set_kernel(k)\n", | |
"\n", | |
"start = time.time()\n", | |
"mmd.set_num_null_samples(100) # Is the kernel matrix precomputed in here? It should be. Why isnt this faster?\n", | |
"# when doing permutation test, is the kernel matrix only computed once and then summed over differently? Or is it copied around?\n", | |
"# what takes the time here?\n", | |
"null_samples = mmd.sample_null()\n", | |
"print \"time taken:\", time.time()-start\n", | |
"plt.hist(null_samples, bins=20);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(5000,) (5000,)\n", | |
"time taken: 11.0228209496\n", | |
"time taken: 164.471257925\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAEACAYAAABWLgY0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADmlJREFUeJzt3X2sZHddx/H3Z7uWp8ryJFvTpUWeAwErmFJSkq4hSFsT\n+g8JD0a0idoQGohELSEkbWJi5B8fEEytqZhiCMSKsFrQYvBKqqEU2m0r7EILCKXQ9QEaQqFY4Osf\nc0ont/funHvvPNz99v1KJnvOzG/OfPbcnc+c+5s5s6kqJEk97Vl1AEnS4ljyktSYJS9JjVnyktSY\nJS9JjVnyktTYzJJPciDJx5N8NsltSd60ybh3Jrk9yeEkZ84/qiRpq/aOGPMD4C1VdTjJKcBnklxX\nVUcfGJDkfODpVfXMJC8GrgDOXkxkSdJYM4/kq+ruqjo8LH8HOAKctm7YhcDVw5gbgH1J9s85qyRp\ni7Y0J5/kqcCZwA3rbjoNuHNq/S4e+kIgSVqy0SU/TNVcA7x5OKKXJO1yY+bkSbKXScG/t6o+vMGQ\nu4CnTK0fGK5bvx2/KEeStqGqst07zrwwmW//w+PcfgFw7bB8NvDJTcbVmMdb9QW4fNUZzPnwywkU\n1IzLZZtcTy3uMTe7bP6Yq96Xu+VnPq/9u5PunHkkn+Qc4JeB25LcPAnN24Azhge+sqo+kuSCJHcA\n9wIXbesVR5I0VzNLvqr+DThpxLhL5pJIkjQ3nvG6sbVVBxhpbdUBRlpbdYCR1lYdYLaDqw4w1tqq\nA4y0tuoAi5Zhvmc5D5ZUbffNA6m5yQcTtvt8DNt5bq3iMR9O5rV/d9KdHslLUmOWvCQ1ZslLUmOW\nvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1\nZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslL\nUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOW\nvCQ1ZslLUmOWvCQ1ZslLUmMzSz7JVUmOJbl1k9vPTXJPkpuGy9vnH1OStB17R4x5D/CnwNXHGfOJ\nqnrlfCJJkuZl5pF8VV0PfGvGsMwnjiRpnuY1J/+SJIeTXJvkuXPapiRph8ZM18zyGeD0qvpukvOB\nDwHP2mxwksunVteqam0OGSSpkTXgIX25Lamq2YOSM4C/r6oXjBj7ZeBFVfXNDW6rqnJqR9pAkoLZ\nz8dN7s12nlureMyHk3nt351059jpmrDJvHuS/VPLZzF54XhIwUuSlm/mdE2S9wEHgScm+SpwGXAy\nUFV1JfCqJG8A7ge+B7x6cXElSVsxarpmbg/mdI20Kadr+jmRpmskSScgS16SGrPkJakxS16SGrPk\nJakxS16SGrPkJakxS16SGrPkJakxS16SGrPkJakxS16SGrPkJakxS16SGrPkJakxS16SGrPkJakx\nS16SGrPkJakxS16SGrPkJakxS16SGrPkJakxS16SGrPkJakxS16SGrPkJakxS16SGrPkJakxS16S\nGrPkJakxS16SGrPkJakxS16SGrPkJakxS16SGrPkJakxS16SGrPkJakxS16SGrPkJakxS16SGrPk\nJakxS16SGrPkJakxS16SGrPkJamxmSWf5Kokx5Lcepwx70xye5LDSc6cb0RJ0naNOZJ/D/CKzW5M\ncj7w9Kp6JnAxcMWcskmSdmhmyVfV9cC3jjPkQuDqYewNwL4k++cTT5K0E3vnsI3TgDun1u8arjs2\nh20vTZKnAc/b5t2PVNUd88wjSfMwj5LfkiSXT62uVdXasjNs7HEfhKc9A/b9YGv3+/ZJcMfXgWcv\nJNYCJI+6G+7b5m9bj/wR3LeNN+wfeazqe6du7zFPLDvbvxLAGvCQvtyWeZT8XcBTptYPDNdtqKou\nn8NjLsCeR8C7HwNnb/F+twAHT15EosW5bz/UNu+bPdu778NpCm+7+zdzT6IT1UHgwb5Mctl2tzT2\niCxs/i/wEPD6IcjZwD1VdUJN1UhSVzOP5JO8j8nLyhOTfBW4DDgZqKq6sqo+kuSCJHcA9wIXLTKw\nJGm8mSVfVa8bMeaS+cSRJM2TZ7xKUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslL\nUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOW\nvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1\nZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslLUmOWvCQ1ZslL\nUmOjSj7JeUmOJvlCkks3uP3cJPckuWm4vH3+USVJW7V31oAke4B3AS8Dvg7cmOTDVXV03dBPVNUr\nF5BRkrRNY47kzwJur6qvVNX9wPuBCzcYl7kmkyTt2JiSPw24c2r9a8N1670kyeEk1yZ57lzSSZJ2\nZOZ0zUifAU6vqu8mOR/4EPCsjQYmuXxqda2q1uaUQZKaWAMe0pfbMqbk7wJOn1o/MFz3Y1X1nanl\njyb5syRPqKpvrt9YVV2+zayS9DBxEHiwL5Nctt0tjZmuuRF4RpIzkpwMvAY4ND0gyf6p5bOAbFTw\nkqTlmnkkX1U/THIJcB2TF4WrqupIkosnN9eVwKuSvAG4H/ge8OpFhpYkjTNqTr6q/hF49rrr/nxq\n+d3Au+cbTZK0U57xKkmNWfKS1JglL0mNWfKS1JglL0mNWfKS1JglL0mNWfKS1JglL0mNWfKS1Jgl\nL0mNWfKS1JglL0mNWfKS1JglL0mNWfKS1JglL0mNWfKS1JglL0mNWfKS1JglL0mNWfKS1JglL0mN\nWfKS1JglL0mNWfKS1JglL0mNWfKS1JglL0mNWfKS1JglL0mNWfKS1JglL0mNWfKS1JglL0mNWfKS\n1JglL0mNWfKS1JglL0mNWfKS1JglL0mNWfKS1JglL0mNWfKS1JglL0mNWfKS1Niokk9yXpKjSb6Q\n5NJNxrwzye1JDic5c74xJUnbMbPkk+wB3gW8Ange8Nokz1k35nzg6VX1TOBi4IoFZF2itVUHGCXJ\nwVVnGGdt1QFGOTH259qqA4xyYuzLEyfnTow5kj8LuL2qvlJV9wPvBy5cN+ZC4GqAqroB2Jdk/1yT\nLtXaqgOMdXDVAcZZW3WAsQ6uOsBsa6sOMNbBVQcY6eCqAyzamJI/Dbhzav1rw3XHG3PXBmMkSUu2\nd9UBdo8ffR/eeC/s+wF86ZHwifvG3e/bJ0H932KzSdL2pKqOPyA5G7i8qs4b1t8KVFW9Y2rMFcC/\nVNUHhvWjwLlVdWzdto7/YJKkDVVVtnO/MUfyNwLPSHIG8A3gNcBr1405BLwR+MDwonDP+oLfSUhJ\n0vbMLPmq+mGSS4DrmMzhX1VVR5JcPLm5rqyqjyS5IMkdwL3ARYuNLUkaY+Z0jSTpxLXQM16TPD7J\ndUk+n+SfkuzbZNy+JH+T5EiSzyZ58SJz7SDnfya5JcnNST61zIxbyTmM3ZPkpiSHlplxeOyZOZM8\nIskNw778bJLf36U5DyT5+JDxtiRv2m0Zh3FXJTmW5NYl5zshTpSclTPJs5P8e5L7krxlFRmHHLNy\nvm7ooFuSXJ/k+TM3WlULuwDvAH53WL4U+INNxv0VcNGwvBd47CJz7SDnl4DHLzPbdnIOt/8W8NfA\nod2aE3j08OdJwCeBc3ZbTuBU4Mxh+RTg88BzdlPG4baXAmcCty4x2x7gDuAM4CeAw+v3DXA+cO2w\n/GLgk8v8GW8h55OAFwG/B7xl2Rm3kPNsYN+wfN6Y/bno0EeB/cPyqcDRDcY8FvjiKnbqVnIOt30Z\neOIJkPMA8DEmJ3qsouRH5Zwa/2jgU8Bzd3POYdyHgJftxoxDOSyz5M8GPjq1/lbg0nVjrgBePbV+\n5IG/z27KOXXbZSss+dE5h9sfB9w5a7uL/oKyJ9fwKZuquht48gZjfgb4nyTvGaYXrkzyqAXnWm9M\nToACPpbkxiS/sbR0Dxqb84+A32GSdxVG5RymlG4G7gbWqupzS8wI4/cnAEmeyuRo+YaFJ3vQljIu\n2YlyouSYnLvBVnP+OvDRWRvd8clQST4GTH+FQZiUy9s3GL5R6ewFXgi8sao+neSPmbyCXbbTbHPO\nCZPphG8k+SkmZX+kqq7fTTmT/BJwrKoOD9/LsZCPrc5jf1bVj4CfS/JY4Lok51bVv+62nMN2TgGu\nAd5cVd/ZjRn18JHkF5h8ivGls8buuOSr6uXHCXIsyf6qOpbkVOC/Nhj2NSa/cnx6WL+GydzjXM0h\nJ1X1jeHP/07yd0y+12euJT+HnOcAr0xyAfAo4CeTXF1Vr99lOae39e0k1wI/D8y15OeRM8leJv8u\n31tVH55nvnllXJG7gNOn1g8M160f85QZYxZtTM7dYFTOJC8ArgTOq6pvzdrooqdrDgG/Niz/KvCQ\nJ8jwq+idSZ41XPUyYNm/ts/MmeTRw9EcSR4D/CLwH8sKOBizP99WVadX1dOYnLj28XkX/Ahj9ueT\nHvikyDA993ImbzQt08ycg78EPldVf7KMUOuMzQiT3wCWecLhj0+UTHIyk39v6z/NdQh4Pfz47PkN\nT5RcsDE5p63qpM2ZOZOcDvwt8CtV9cVRW13wGwlPAP6ZyScSrgMeN1z/08A/TI372eEveBj4IMO7\nx0t8w2NmTibvHRwGbgZuA966zIxb2Z9T489lNW+8jtmfzwduGvbnLcBv79Kc5wA/nPrZ38TkCGrX\nZBzW3wd8Hfg+8FWGT6stId95Q7bbH3hOMPm68d+cGvMuJp8auQV44bJ/zmNyMpkuuxO4B/jmsA9P\n2YU5/wL436nnzqdmbdOToSSpMf/7P0lqzJKXpMYseUlqzJKXpMYseUlqzJKXpMYseUlqzJKXpMb+\nH8nwSggAS41fAAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x7f6b2839a110>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"# same as above, how far can be go in terms of data?\n", | |
"\n", | |
"X,Y = sample_gaussian_vs_laplace(n=5000)\n", | |
"print X.shape, Y.shape\n", | |
"\n", | |
"feats_p = RealFeatures(X.reshape(1,len(X)))\n", | |
"feats_q = RealFeatures(X.reshape(1,len(Y)))\n", | |
"width=1\n", | |
"k = GaussianKernel(10, width)\n", | |
"\n", | |
"joint_features = RealFeatures(np.hstack((X, Y)).reshape(1, len(X)+len(Y)))\n", | |
"k.init(joint_features,joint_features)\n", | |
"start = time.time()\n", | |
"k.get_kernel_matrix()\n", | |
"print \"time taken:\", time.time()-start\n", | |
"\n", | |
"mmd = QuadraticTimeMMD()\n", | |
"mmd.set_p(feats_p)\n", | |
"mmd.set_q(feats_q)\n", | |
"mmd.set_kernel(k)\n", | |
"\n", | |
"start = time.time()\n", | |
"mmd.set_num_null_samples(10)\n", | |
"null_samples = mmd.sample_null()\n", | |
"print \"time taken:\", time.time()-start\n", | |
"plt.hist(null_samples, bins=20);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(100,)\n", | |
"0 9.79900360107e-05\n", | |
"1 5.28958201408\n", | |
"2 4.12843990326\n", | |
"3 5.09187102318\n", | |
"4 4.14835810661\n", | |
"5 5.15286803246\n", | |
"6 3.57714080811\n", | |
"7 3.16793704033\n", | |
"8 2.932336092\n", | |
"9 3.41076993942\n", | |
"10 2.9093170166\n", | |
"11 3.13496112823\n", | |
"12 3.31558704376\n", | |
"13 2.8714017868\n", | |
"14 2.85468292236\n", | |
"15 2.90135788918\n", | |
"16 2.92182588577\n", | |
"17 3.0945250988\n", | |
"18 3.04426503181\n", | |
"19 2.88243985176\n" | |
] | |
} | |
], | |
"source": [ | |
"# are p-values uniformly distributed if H0 is true?\n", | |
"# Initial try, too slow. Made smaller below, but this case also is interesting\n", | |
"\n", | |
"X,_ = sample_gaussian_vs_laplace(n=100)\n", | |
"X2,_ = sample_gaussian_vs_laplace(n=100)\n", | |
"\n", | |
"print X.shape\n", | |
"\n", | |
"feats_p = RealFeatures(X.reshape(1,len(X)))\n", | |
"feats_q = RealFeatures(X2.reshape(1,len(X2)))\n", | |
"width=1\n", | |
"k = GaussianKernel(10, width)\n", | |
"\n", | |
"mmd = QuadraticTimeMMD()\n", | |
"mmd.set_p(feats_p)\n", | |
"mmd.set_q(feats_q)\n", | |
"mmd.set_kernel(k)\n", | |
"\n", | |
"\n", | |
"mmd.set_num_null_samples(500) # if this is not called, there is no default and p-values are all nan\n", | |
"\n", | |
"#mmd.set_null_approximation_method()\n", | |
"\n", | |
"num_runs = 20\n", | |
"p_values = np.zeros(num_runs)\n", | |
"\n", | |
"last = time.time()\n", | |
"for i in range(num_runs):\n", | |
" print i, time.time()-last\n", | |
" last = time.time()\n", | |
" \n", | |
" # this is a usual use-case: repeating the same experiment multiple times.\n", | |
" # I guess we can save computation here, e.g. the precomputed kernel matrix\n", | |
" # .... maybe use some kind of lazy evaluation?\n", | |
" # also, notice the inhomogeneous times. Is that just by chance? Sometimes there are outliers up to 9s.\n", | |
" stat = mmd.compute_statistic()\n", | |
" p_values[i] = mmd.compute_p_value(stat)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXUAAAEACAYAAABMEua6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADeBJREFUeJzt3V2sZfVdxvHvU8aiLXVCR5mJvIqmxZDipLFYQy+GaoR6\nA+GiWky1JhoSpRK9sNDEwI1pekOsMZjQUoKJpFEaLVAptMKkwZQWbXmnWKpQwc7UFmiKEaXl58XZ\nA4fxzOx99t5nrzW/+X6SlayzX2Y9WWftZ9b5n/U/K1WFJKmH1wwdQJK0PJa6JDViqUtSI5a6JDVi\nqUtSI5a6JDUytdSTnJTkziQPJ3kwyfsnj1+Z5KkkX54s5299XEnS4WTadepJdgG7quq+JMcB/wxc\nAPwq8L2qunrrY0qSZrFt2guqah+wb7L+fJJHgRMnT2cLs0mSNmlTY+pJTgN2A1+cPHRpkvuSfCzJ\n9iVnkyRt0sylPhl6uQm4rKqeB64BTq+q3aydyTsMI0kDmzqmDpBkG3ArcFtVfWSD508FbqmqszZ4\nzj8uI0lzqKpND3FPHVOf+DjwyPpCT7JrMt4OcBHw0DKDdZTkqqq6augcY+C+eIX74hXui1fMe0I8\ntdSTnAP8OvBgkq8ABXwQuDjJbuAl4AngknkCSJKWZ5arX/4ROGaDpz6z/DiSpEU4o3S19g4dYET2\nDh1gRPYOHWBE9g4d4Eg30y9KF9pAUo6pS9LmzNudnqlLUiOWuiQ1YqlLUiOzXqcuSVsu+ZF98MLO\nYVP88P6q/941bIb5+YtSSaOxNuFm6EnoGcWESX9RKkmy1CWpE0tdkhqx1CWpEUtdkhqx1CWpkaPi\nOvVxXPsKR/r1r9LR4dgj+uY+R8V16uO49hXGcv2rNFbj+KyG4TPAvH3h8IskNWKpS1IjlrokNWKp\nS1IjlrokNWKpS1IjK7lOPckpq9iOJB3tVjT5aMcjq9nORl4KPDvc5qUpxjE5zolxXaxk8tGwF/I/\nA+zgSJ5MoN6G/4zAWI7NseyL4TOAk48kSZa6JHViqUtSI5a6JDViqUtSI5a6JDViqUtSI0fFnY+k\njYxj0o+0XJa6jmIv7BzLJBNpWRx+kaRGLHVJasRSl6RGppZ6kpOS3Jnk4SQPJvn9yePHJ7kjyWNJ\nbk+yfevjSpIOZ5Yz9e8Df1hVZwK/APxekjOAy4HPVdWbgTuBK7YupiRpFlNLvar2VdV9k/XngUeB\nk4ALgBsmL7sBuHCrQkqSZrOpMfUkpwG7gXuAnVW1H9aKHzhh2eEkSZsz83XqSY4DbgIuq6rn1/6Y\n/asc5oLfq9at75ksOpo58Uc62N7JspiZ7nyUZBtwK3BbVX1k8tijwJ6q2p9kF3BXVf3MBu/1zkcv\nG8fdZcZg+OMCxnSHm+FzjOPY9LhYb2vvfPRx4JEDhT5xM/C+yfpvAp/a7MYlScs19Uw9yTnA54EH\nWfvvq4APAl8C/ho4GXgSeHdVPbfB+z1Tf9k4zobGYPjjAsZ0RjZ8jnEcmx4X6833PfHG0ys1jg/O\nGAx/XMCYPrzD5xjHselxsZ43npako56lLkmNWOqS1IilLkmNWOqS1IilLkmNWOqS1IilLkmNWOqS\n1IilLkmNWOqS1IilLkmNWOqS1MjMdz5SD95xSOrNUj/qvLBzLH9WVNLyOfwiSY1Y6pLUiKUuSY1Y\n6pLUiKUuSY1Y6pLUiKUuSY1Y6pLUiKUuSY1Y6pLUiKUuSY1Y6pLUiKUuSY1Y6pLUiKUuSY1Y6pLU\niKUuSY1Y6pLUiKUuSY1Y6pLUyNRST3Jdkv1JHlj32JVJnkry5cly/tbGlCTNYpYz9euB8zZ4/Oqq\neutk+cySc0mS5jC11KvqbuDZDZ7K8uNIkhaxyJj6pUnuS/KxJNuXlkiSNLd5S/0a4PSq2g3sA65e\nXiRJ0ry2zfOmqvrPdV9+FLjl8O+4at36nskiaTyOJUkNneLotneyLCZV07+PSU4Dbqmqt0y+3lVV\n+ybrfwC8raouPsR7C4Y8Vp4BdjBshgNCVQ36u4jhvx8HhOFzjCEDjCPHGDLAOHKMIQPM2xdTz9ST\n3MjaqfWOJN8ArgTOTbIbeAl4ArhksxuWJC3fTGfqC21g8DNDz9RflWDw78cBYzgbGkMGGEeOMWSA\nceQYQwaYty+cUSpJjVjqktSIpS5JjVjqktSIpS5JjVjqktSIpS5JjVjqktSIpS5JjVjqktSIpS5J\njVjqktSIpS5JjVjqktSIpS5JjVjqktSIpS5JjVjqktSIpS5JjVjqktSIpS5JjVjqktSIpS5JjWwb\nOsDR5ViS1NApJPVlqa/U/wBDd3oG3r6kreTwiyQ1YqlLUiOWuiQ1YqlLUiOWuiQ1YqlLUiOWuiQ1\nYqlLUiOWuiQ1YqlLUiOWuiQ1MrXUk1yXZH+SB9Y9dnySO5I8luT2JNu3NqYkaRaznKlfD5x30GOX\nA5+rqjcDdwJXLDuYJGnzppZ6Vd0NPHvQwxcAN0zWbwAuXHIuSdIc5h1TP6Gq9gNU1T7ghOVFkiTN\na1m/KB36j4RLkpj/Jhn7k+ysqv1JdgHfOvzLr1q3vmeySJJesXeyLCZV00+yk5wG3FJVb5l8/WHg\nmar6cJIPAMdX1eWHeG8NeyL/DLCDcfwwEYbPMYYMMI4cY8gA48gxhgwwjhxjyAAQqmrTtyqbWupJ\nbmTt1HoHsB+4Evg74G+Ak4EngXdX1XOHeL+l/rIxHCxjyADjyDGGDDCOHGPIAOPIMYYMsGWlvihL\nfb0xHCxjyADjyDGGDDCOHGPIAOPIMYYMMG+pO6NUkhqx1CWpEUtdkhqx1CWpEUtdkhqx1CWpEUtd\nkhqx1CWpEUtdkhqx1CWpEUtdkhqx1CWpEUtdkhqx1CWpEUtdkhqx1CWpEUtdkhqx1CWpEUtdkhqx\n1CWpEUtdkhqx1CWpEUtdkhqx1CWpEUtdkhqx1CWpEUtdkhqx1CWpEUtdkhqx1CWpEUtdkhqx1CWp\nEUtdkhqx1CWpEUtdkhqx1CWpkW2LvDnJE8B3gZeAF6vq7GWEkiTNZ6FSZ63M91TVs8sII0lazKLD\nL1nCvyFJWpJFC7mAzya5N8nvLCOQJGl+iw6/nFNV30zy46yV+6NVdff/f9lV69b3TBZJ0iv2TpbF\npKoW/kcAklwJfK+qrj7o8Vo7oR/KM8AOhs1wQBg+xxgywDhyjCEDjCPHGDLAOHKMIQNAqKps9l1z\nD78keV2S4ybrrwd+GXho3n9PkrS4RYZfdgJ/u3Ymzjbgr6rqjuXEkiTNY2nDL4fcgMMv64zhx7ox\nZIBx5BhDBhhHjjFkgHHkGEMGWPnwiyRpfCx1SWrEUpekRix1SWrEUpekRix1SWrEUpekRix1SWrE\nUpekRix1SWrEUpekRix1SWrEUpekRix1SWrEUpekRix1SWrEUpekRix1SWrEUpekRix1SWrEUpek\nRix1SWrEUpekRix1SWrEUpekRix1SWrEUpekRix1SWrEUpekRix1SWrEUpekRix1SWrEUpekRix1\nSWrEUpekRhYq9STnJ/lqkn9J8oFlhZIkzWfuUk/yGuDPgfOAM4H3JDljWcEkSZu3yJn62cDXqurJ\nqnoR+ARwwXJiSZLmsUipnwj8+7qvn5o8JkkayLbVbOad313NdjbyvwF+dLjtS9LqLFLqTwOnrPv6\npMljG7hr+wLbWZIMHWBiDDnGkAHGkWMMGWAcOcaQAcaRYwwZ5pOqmu+NyTHAY8AvAt8EvgS8p6oe\nXV48SdJmzH2mXlU/SHIpcAdrY/PXWeiSNKy5z9QlSeOztBmls0xESvJnSb6W5L4ku5e17bGZti+S\nXJzk/slyd5K3DJFzq806OS3J25K8mOSiVeZbpRk/H3uSfCXJQ0nuWnXGVZnh87EjyW2TnngwyfsG\niLkSSa5Lsj/JA4d5zeZ6s6oWXlj7z+Fx4FTgh4D7gDMOes27gE9P1n8euGcZ2x7bMuO+eDuwfbJ+\nfsd9Mct+WPe6fwBuBS4aOveAx8R24GHgxMnXPzZ07gH3xZXAhw7sB+A7wLahs2/R/ngHsBt44BDP\nb7o3l3WmPstEpAuAvwSoqi8C25PsXNL2x2Tqvqiqe6rqwGWe99Dz+v5ZJ6e9H7gJ+NYqw63YLPvi\nYuCTVfU0QFV9e8UZV2WWfbEPeMNk/Q3Ad6rq+yvMuDJVdTfw7GFesuneXFapzzIR6eDXPL3BazrY\n7KSs3wZu29JEw5i6H5L8BHBhVf0FR/I1ZNPNcky8CXhjkruS3JvkvStLt1qz7IuPAmcm+Q/gfuCy\nFWUbo0335oomH2kjSc4Ffou1H8GORn8KrB9T7Vzs02wD3gq8E3g98IUkX6iqx4eNNYgrgPur6twk\nPwV8NslZVfX80MGOBMsq9VkmIj0NnDzlNR3MNCkryVnAtcD5VXW4H7+OVLPsh58DPpEkrI2dvivJ\ni1V184oyrsos++Ip4NtV9QLwQpLPAz/L2vhzJ7Psi3OAPwGoqq8n+TfgDOCfVpJwXDbdm8safrkX\n+OkkpyZ5LfBrwMEfzJuB3wBI8nbguarav6Ttj8nUfZHkFOCTwHur6usDZFyFqfuhqk6fLD/J2rj6\n7zYsdJjt8/Ep4B1JjknyOtZ+KdZx3scs++JR4JcAJuPHbwL+daUpVysc+qfUTffmUs7U6xATkZJc\nsvZ0XVtVf5/kV5I8DvwXa8MO7cyyL4A/Bt4IXDM5S32xqs4eLvXyzbgfXvWWlYdckRk/H19Ncjvw\nAPAD4NqqemTA2FtixuPiQ8D1Se5nrez+qKqeGS711klyI7AH2JHkG6xd+fNaFuhNJx9JUiPezk6S\nGrHUJakRS12SGrHUJakRS12SGrHUJakRS12SGrHUJamR/wNLLN+0xeubDQAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x7f6b28524210>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"# are p-values uniformly distributed if H0 is true?\n", | |
"# permutation test version. TODO test other null approx methods for that\n", | |
"num_runs = 200\n", | |
"p_values = np.zeros(num_runs)\n", | |
"\n", | |
"last = time.time()\n", | |
"for i in range(num_runs):\n", | |
" X,_ = sample_gaussian_vs_laplace(n=100)\n", | |
" X2,_ = sample_gaussian_vs_laplace(n=100)\n", | |
"\n", | |
" feats_p = RealFeatures(X.reshape(1,len(X)))\n", | |
" feats_q = RealFeatures(X2.reshape(1,len(X2)))\n", | |
" width=1\n", | |
" k = GaussianKernel(10, width)\n", | |
"\n", | |
" mmd = QuadraticTimeMMD()\n", | |
" mmd.set_p(feats_p)\n", | |
" mmd.set_q(feats_q)\n", | |
" mmd.set_kernel(k)\n", | |
"\n", | |
" mmd.set_num_null_samples(50)\n", | |
" stat = mmd.compute_statistic() # would be good if compute_p_value() with no arguments computed the statistic itself\n", | |
" p_values[i] = mmd.compute_p_value(stat)\n", | |
"\n", | |
"# does this look more or less uniform (it has to be)?\n", | |
"plt.hist(p_values);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.055\n" | |
] | |
} | |
], | |
"source": [ | |
"# does the type1 error (wrong alarm) calibration work.\n", | |
"# if so, it should be equal to the alpha on average\n", | |
"# permutation test version. TODO test other null approx methods for that\n", | |
"\n", | |
"num_runs = 200\n", | |
"rejections = np.zeros(num_runs)\n", | |
"\n", | |
"last = time.time()\n", | |
"for i in range(num_runs):\n", | |
" X,_ = sample_gaussian_vs_laplace(n=100)\n", | |
" X2,_ = sample_gaussian_vs_laplace(n=100)\n", | |
"\n", | |
" feats_p = RealFeatures(X.reshape(1,len(X)))\n", | |
" feats_q = RealFeatures(X2.reshape(1,len(X2)))\n", | |
" width=1\n", | |
" k = GaussianKernel(10, width)\n", | |
"\n", | |
" mmd = QuadraticTimeMMD()\n", | |
" mmd.set_p(feats_p)\n", | |
" mmd.set_q(feats_q)\n", | |
" mmd.set_kernel(k)\n", | |
"\n", | |
" mmd.set_num_null_samples(50)\n", | |
" alpha=0.05\n", | |
" rejections[i] = mmd.perform_test(alpha)\n", | |
"\n", | |
"# we expect roughtly 0.05 (false) rejection rate here\n", | |
"print np.mean(rejections)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#Other\n", | |
"\n", | |
" * I just realised that `CHypothesisTest::compute_p_value` uses this `find_position_to_insert`. This method should go away (delete it), and we should just do `np.mean(stat>null_samples)` which automatically gives a p-value\n", | |
" \n", | |
" * The print out of `KernelManager::kernel_at() : getting the kernel 0` should be removed. As well as all the others ... this makes things slow\n", | |
"\n", | |
" \n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment