Skip to content

Instantly share code, notes, and snippets.

@michaelchughes
Created February 18, 2013 20:40
Show Gist options
  • Save michaelchughes/4980514 to your computer and use it in GitHub Desktop.
Save michaelchughes/4980514 to your computer and use it in GitHub Desktop.
Is Matlab faster than Python for Bayesian Machine Learning? This is benchmark code for comparing the computation of a multinomial-dirichlet marginal likelihood. Compare the function "calc_marg_lik" in DMMultLik.py with the Matlab function "DMMultLik.m". DMMultLik.py is a script that will compare both of these for Ntrial runs on a given problem.
function logp = DMMargLik( Nvec, alphvec )
logZ_lik = sum( gammaln( Nvec+alphvec ) ) - gammaln( sum(Nvec+alphvec) );
logZ_prior = sum( gammaln( alphvec) ) - gammaln( sum( alphvec ) );
logp = logZ_lik - logZ_prior;
end
'''
Benchmark code to time both python and matlab in computing
the Multinomial-Dirichlet marginal likelihood
Usage:
>> python DMMargLik.py
Example Output: (time to run 1000 trials of the computation, printed to stdout)
Matlab: 0.03850 sec
Python: 0.03629 sec
End-User Should Modify:
change the definition of "MLABcmd" string variable to point to the matlab
installed executable on the current system.
'''
import scipy.special as sp
import numpy as np
import time
import commands
# Change this line! to point to the matlab executable on your machine!
# e.g on the Brown CS network us
# MLABcmd = '/local/projects/matlab/R2011b/bin/matlab/'
MLABcmd = '/Applications/MATLAB_R2011b.app/bin/matlab'
MLABcmd += " -nodesktop -nodisplay -r "
MLABmsgend = 'mathworks.com.'
#################################### Function we want to benchmark
def calc_marg_lik( Nvec, alphvec ):
logZ_lik = np.sum( sp.gammaln( Nvec+alphvec ) ) \
- sp.gammaln( np.sum(Nvec+alphvec) )
logZ_prior = np.sum( sp.gammaln( alphvec) ) \
- sp.gammaln( np.sum( alphvec ) )
return logZ_lik - logZ_prior
#################################### Utility code to perform benchmarks
def gen_problem( Nobs, alpha, K, seed=42):
np.random.seed( seed )
pi = np.random.mtrand.dirichlet( alpha* np.ones(K) )
Nvec = np.random.mtrand.multinomial( Nobs, pi )
return Nvec, alpha*np.ones(K)
def test_python( Nvec, alphvec, Ntrial):
''' Execute Ntrial trials of the python computation, return time as float'''
tic= time.time()
for trial in xrange( Ntrial ):
mLik = calc_marg_lik( Nvec, alphvec )
toc = time.time()
return toc-tic
def test_matlab( Nvec, alphvec, Ntrial ):
myPreloadCall = "Nvec = %s;" % (np2mlabstr( Nvec) )
myPreloadCall += "alphvec = %s;" % (np2mlabstr( alphvec))
myPreloadCall += "DMMargLik( Nvec, alphvec );"
myFuncCall = "DMMargLik( Nvec, alphvec);"
myStatement = '"%s tic; for i = 1:%d; %s; end; t=toc; disp(t); exit;"' % (myPreloadCall, Ntrial, myFuncCall)
print 'Calling MATLAB executable with arg: \n', myStatement
status, output = commands.getstatusoutput( MLABcmd + myStatement );
if status is not 0:
raise ValueError( output )
output = parse_matlab_out( output )
return float(output)
#################################### Utility code to call matlab from python
def parse_matlab_out( output ):
output = output.split( MLABmsgend )[1]
for line in output.split('\n'):
line = line.strip()
if len(line)==0:
continue
return line
def np2mlabstr( xvec ):
return '[' + ' '.join( ['%d'%(x) for x in xvec] ) + ']'
#################################### Main Function (calls both Mlab & python)
K = 120
alpha = 1.0
Nobs = 12345
Ntrial = 1000
Nvec, alphvec = gen_problem( Nobs, alpha, K )
mtime = test_matlab( Nvec, alphvec, Ntrial )
ptime = test_python( Nvec, alphvec, Ntrial )
print 'Matlab: %.5f sec' % (mtime)
print 'Python: %.5f sec' % (ptime)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment