Created
December 18, 2015 10:08
-
-
Save mickypaganini/39370564903316b152f3 to your computer and use it in GitHub Desktop.
Non-parametric curve fitting using Kernel Density Estimation and peak finding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def non_param_fit(hist, kernel = 'gaussian', bandwidth = 12):\n", | |
" '''\n", | |
" Function:\n", | |
" ---------\n", | |
" used to calculate a non-parametric curve fit to a histogram via Kernel Density Estimation in scikit-learn \n", | |
" (http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html#sklearn.neighbors.KernelDensity)\n", | |
" Args:\n", | |
" -----\n", | |
" hist: a list containing all the x-values usually plotted in a histogram we want to fit\n", | |
" kernel: (optional) the type of kernel to use. Options:'gaussian', 'cosine', 'tophat', 'epanechnikov', 'exponential', 'linear'\n", | |
" bandwidth: (optional) width of the kernel used in the fit. If this number is too small, the fit will appear noisy and with a lot of \n", | |
" bumps due to local behaviors. If this number is too big, the curve will be overly smoothed, and won't capture \n", | |
" much of the underlying data structure.\n", | |
" Returns:\n", | |
" --------\n", | |
" log_dense: density model of the input data 'hist'; need to take the np.exp(log_dense) to actually plot the fit\n", | |
" '''\n", | |
" from sklearn.neighbors import KernelDensity\n", | |
" \n", | |
" kde = KernelDensity(kernel=kernel, bandwidth=bandwidth).fit(hist[:, np.newaxis])\n", | |
" log_dense = kde.score_samples(hist[:, np.newaxis])\n", | |
" return log_dense " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def get_peak(hist, log_dense):\n", | |
" '''\n", | |
" Function:\n", | |
" ---------\n", | |
" used to find the peak in the fit of a histogram\n", | |
" Args:\n", | |
" -----\n", | |
" hist: a list containing all the x-values usually plotted in a histogram we want to fit and find the peak of\n", | |
" log_dense: density model of the input data 'hist' (the output of the function above)\n", | |
" Returns:\n", | |
" --------\n", | |
" the x location of the peak, according to the fit\n", | |
" '''\n", | |
" return hist[np.argmax(log_dense)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def plotting_example(hist):\n", | |
" '''\n", | |
" quick example on how to use the functions above\n", | |
" '''\n", | |
" plt.hist(hist, histtype = 'stepfilled') # plot histogram\n", | |
" ixa = np.argsort(hist[:1000]) # sort histogram indices and only take first 1000 samples for the fit\n", | |
" log_dense_a = non_param_fit(hist[:1000]) # calculate log density\n", | |
" plt.plot(hist[:1000][ixa], np.exp(log_dense_a)[ixa], '-') # plot fit \n", | |
" plt.show()\n", | |
" print 'Peak at : {}'.format(get_peak(hist, log_dense_a))\n", | |
" return" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment