Skip to content

Instantly share code, notes, and snippets.

@andyreagan
Created October 8, 2013 02:44
Show Gist options
  • Save andyreagan/6878629 to your computer and use it in GitHub Desktop.
Save andyreagan/6878629 to your computer and use it in GitHub Desktop.
Fitting data to a power law, computing statistics and testing against other distributions.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "Philanthropy distribution fitting"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": "Test power laws!\n============\nFirst, import the python package.\nThis should have everything we need."
},
{
"cell_type": "code",
"collapsed": false,
"input": "import powerlaw\npowerlaw",
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 2,
"text": "<module 'powerlaw' from '/Users/andyreagan/anaconda/python.app/Contents/lib/python2.7/site-packages/powerlaw.pyc'>"
}
],
"prompt_number": 2
},
{
"cell_type": "markdown",
"metadata": {},
"source": "Load the data"
},
{
"cell_type": "code",
"collapsed": false,
"input": "fileroot = \"/Users/andyreagan/work/2012/2012-01philanthropy-distributions/data/2012-08-30Nonprofit_Data_for_Peter-bill-gottesman-\"\nnames = [\"sinai\",\"Einstein\",\"UVM\",\"unitedway\",\"echo\",\"flynntheater\"]\nreal_names = [\"Mount Sinai Hospital\",\"Einstein School of Medicine\",\"Univeristy of Vermont\",\"United Way, Chittendon County\",\"ECHO Science Museum\",\"Flynn Theater\"]\n# these are the years for which we have data\nyears = [[2009,2010],[2006,2007,2008,2009,2010],[2010,2000,1990,1980,1974],[2004,2005,2006,2007,2008,2009,2010],[2005,2006,2007,2008,2009],[2006,2007,2008,2009,2010]]\n#the data is messy, these are the columns that correspond to the above years\nreal_data_indices = [range(2),[2*x+2 for x in range(5)],range(5),range(7),range(6),[2*x for x in range(5)]]\nn_inst = len(names)\nall_data = range(n_inst)\nfilenames = [fileroot + name + \".csv\" for name in names]\nfor i in range(n_inst):\n #print filenames[i]\n f = open(filenames[i],'r')\n data_raw = [map(float,line.rstrip().split(',')) for line in f]\n f.close()\n #rearrange the data\n #print len(data_raw)\n data = [[tmp[j] for tmp in data_raw] for j in range(len(data_raw[0]))]\n #print len(data)\n #print len(data[0])\n data = [data[j] for j in real_data_indices[i]]\n #print len(data)\n \n all_data[i] = data\nprint \"complete!\"",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "complete!\n"
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": "def powerfit(all_data,name,years,f):\n #print len(all_data)\n f.write(name)\n for i in range(len(years)):\n #remove zeros\n data = [all_data[i][j] for j in range(len(all_data[i])) if all_data[i][j] > 0]\n \n f.write(' & {!s}'.format(years[i]))\n \n #write basic statistics\n f.write(' & {0:.2f}'.format(mean(data)))\n f.write(' & {0:.2f}'.format(std(data)))\n f.write(' & {0:.0f}'.format(max(data)))\n \n print \"Fitting \"+name+\" for \"+str(years[i])\n #print len(data)\n fit = powerlaw.Fit(data)\n #print fit\n \n f.write(' & {0:.2f} $\\\\pm$ {1:.2f}'.format(fit.power_law.alpha,fit.power_law.sigma))\n f.write(' & {0:.0f}'.format(fit.xmin))\n # could compute n_tail...don't care that much\n f.write(' & {0:.2f}'.format(fit.power_law.D))\n \n #print fit.distribution_compare(\"power_law\", \"exponential\")\n f.write('\\\\\\\\\\n')\n f.write(\"\\\\hline\\n\")\n \ndef powertest(all_data,name,years,f):\n #print len(all_data)\n f.write(name)\n for i in range(len(years)):\n f.write(' & {!s}'.format(years[i]))\n \n #remove zeros\n data = [all_data[i][j] for j in range(len(all_data[i])) if all_data[i][j] > 0]\n print \"Fitting \"+name+\" for \"+str(years[i])\n fit = powerlaw.Fit(data)\n f.write(' & {0:.2f}'.format(fit.power_law.D))\n \n #f.write('{0:} {1:}'.format(map(str,fit.distribution_compare(\"power_law\", \"lognormal\"))))\n for dist in [\"lognormal\",\"exponential\", \"stretched_exponential\", \"truncated_power_law\"]: #\"gamma\",\"power_law\"\n (LR,p) = fit.distribution_compare(\"power_law\", dist)\n if p < 0.05:\n f.write('& {0:8.2f} ~~ $\\\\mathbf{{{1:.2f}}}$'.format(LR,p))\n else:\n f.write('& {0:8.2f} ~~ {1:.2f}'.format(LR,p))\n\n f.write('\\\\\\\\\\n')\n f.write(\"\\\\hline\\n\") \n \nf = open('/Users/andyreagan/work/2012/2012-01philanthropy-distributions/philanthropy-distributions.supplementary.tex','w')\n#f.write(\"\\\\begin{center}\\n\")\nf.write(\"\\\\begin{table}\\n\")\n#f.write(\"\\\\caption{Data Statistics and Power Law MLE Fits}\\n\")\nf.write(\"\\\\begin{tabular}{|lc|rrr|cl|c|}\\n\")\nf.write(\"\\\\hline\\n\")\nf.write(\"{\\\\bf Institution} & {\\\\bf Year} & $~~~~~~~~~~~\\\\mathbf{ \\\\langle x \\\\rangle}$ & $~~~~~~~~~~~~~\\\\mathbf{ \\\\sigma}$ & $~~~~~~~~~\\\\mathbf{x_\\\\text{max}}$ & $~~~~~~~~~\\\\mathbf{\\\\gamma}~~~~~~~~~$ & $\\\\mathbf{x_\\\\text{min} }~~~~~$ & ~~~{\\\\bf D} ~~~\\\\\\\\ \\n\")\n#f.write(\"\\\\hline\\n\")\nf.write(\"\\\\hline\\n\")\n\nfrom numpy import std,mean\nfor i in range(n_inst):\n powerfit(all_data[i],real_names[i],years[i],f)\n \nf.write(\"\\\\end{tabular}\\n\")\nf.write(\"\\\\label{table:fitstats}\\n\")\nf.write(\"\\\\caption{Summary statistics of all of the donation data is presented. The reported $\\\\gamma$ and range are fit with the MLE method, and the $x_\\\\text{min}$ which was found to minimize the Kolmogorov-Smirnoff statistc {\\\\bf D} is reported along with {\\\\bf D} itself. In this case, lower values of {\\\\bf D} indicate a better fit.}\\n\")\nf.write(\"\\\\end{table}\\n\")\n\n\n\nf.write(\"\\\\begin{table}\\n\")\n\nf.write(\"\\\\begin{tabular}{|lc|c|c|c|c|c|}\\n\")\nf.write(\"\\\\hline\\n\")\nf.write(\" & & & {\\\\bf Log-Normal } & {\\\\bf Exponential } & {\\\\bf Stretched Exp. } & {\\\\bf Cutoff Power Law} \\\\\\\\ \\n\")\nf.write(\"{\\\\bf Institution} & {\\\\bf Year} & {\\\\bf ~~D~~} & {\\\\bf ~~LR~~} ~~ {\\\\bf ~~p~~} & {\\\\bf ~~LR~~} ~~ {\\\\bf ~~p~~} & {\\\\bf ~~LR~~} ~~ {\\\\bf ~~p~~} & {\\\\bf ~~LR~~} ~~ {\\\\bf ~~p~~}\\\\\\\\ \\n\")\n#f.write(\"\\\\hline\\n\")\nf.write(\"\\\\hline\\n\")\n\nfrom numpy import std,mean\nfor i in range(n_inst):\n powertest(all_data[i],real_names[i],years[i],f)\n \nf.write(\"\\\\end{tabular}\\n\")\nf.write(\"\\\\label{table:fitdata}\\n\")\nf.write(\"\\\\caption{The results of the Likelihood-Ratio and its associated {\\\\bf p}-value are reported for different distributions. Here, positive values lend support to the Power Law and negative values to the other stated distribution. The significance of the {\\\\bf LR} is {\\\\bf p}, where low values of {\\\\bf p} indicate a trustworthy {\\\\bf LR}. Values for which $\\\\mathbf{ p} < 0.05$ are bolded.}\\n\")\n\nf.write(\"\\\\end{table}\\n\")\n\n#f.write(\"\\\\end{center}\\n\")\nf.close()\n\nfrom os import system\n\nsystem(\"cd /Users/andyreagan/work/2012/2012-01philanthropy-distributions; ./makerevtex4.pl\")\nsystem(\"open /Users/andyreagan/work/2012/2012-01philanthropy-distributions/philanthropy-distributions-revtex4.pdf\")",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "Fitting Mount Sinai Hospital for 2009\nCalculating best minimal value for power law fit\nFitting Mount Sinai Hospital for 2010"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting Einstein School of Medicine for 2006"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting Einstein School of Medicine for 2007"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting Einstein School of Medicine for 2008"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting Einstein School of Medicine for 2009"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting Einstein School of Medicine for 2010"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting Univeristy of Vermont for 2010"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting Univeristy of Vermont for 2000"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting Univeristy of Vermont for 1990"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting Univeristy of Vermont for 1980"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting Univeristy of Vermont for 1974"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting United Way, Chittendon County for 2004"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting United Way, Chittendon County for 2005"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting United Way, Chittendon County for 2006"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting United Way, Chittendon County for 2007"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting United Way, Chittendon County for 2008"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting United Way, Chittendon County for 2009"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting United Way, Chittendon County for 2010"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting ECHO Science Museum for 2005"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting ECHO Science Museum for 2006\nCalculating best minimal value for power law fit\nFitting ECHO Science Museum for 2007\nCalculating best minimal value for power law fit\nFitting ECHO Science Museum for 2008\nCalculating best minimal value for power law fit\nFitting ECHO Science Museum for 2009\nCalculating best minimal value for power law fit\nFitting Flynn Theater for 2006\nCalculating best minimal value for power law fit\nFitting Flynn Theater for 2007"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting Flynn Theater for 2008"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting Flynn Theater for 2009"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nFitting Flynn Theater for 2010\nCalculating best minimal value for power law fit\nFitting Mount Sinai Hospital for 2009"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Mount Sinai Hospital for 2010"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Einstein School of Medicine for 2006"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Einstein School of Medicine for 2007"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Einstein School of Medicine for 2008"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Einstein School of Medicine for 2009"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Einstein School of Medicine for 2010"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Univeristy of Vermont for 2010"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Univeristy of Vermont for 2000"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Univeristy of Vermont for 1990"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Univeristy of Vermont for 1980"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Univeristy of Vermont for 1974"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting United Way, Chittendon County for 2004"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting United Way, Chittendon County for 2005"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting United Way, Chittendon County for 2006"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting United Way, Chittendon County for 2007"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting United Way, Chittendon County for 2008"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting United Way, Chittendon County for 2009"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting United Way, Chittendon County for 2010"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting ECHO Science Museum for 2005"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting ECHO Science Museum for 2006"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting ECHO Science Museum for 2007"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting ECHO Science Museum for 2008"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting ECHO Science Museum for 2009"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Flynn Theater for 2006"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Flynn Theater for 2007"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Flynn Theater for 2008"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Flynn Theater for 2009"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nFitting Flynn Theater for 2010"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\nCalculating best minimal value for power law fit\nAssuming nested distributions"
},
{
"output_type": "stream",
"stream": "stdout",
"text": "\n"
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": "0"
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": "",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 158
},
{
"cell_type": "code",
"collapsed": false,
"input": "",
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment