Skip to content

Instantly share code, notes, and snippets.

@erikbern
Created April 27, 2016 02:10
Show Gist options
  • Save erikbern/a31f9bf7ea896b5d5c286ad13155e8c1 to your computer and use it in GitHub Desktop.
Save erikbern/a31f9bf7ea896b5d5c286ad13155e8c1 to your computer and use it in GitHub Desktop.
import urllib2, csv
import matplotlib.pyplot as plt
import datetime
import seaborn
import numpy, scipy.stats, math
f = urllib2.urlopen('https://raw.githubusercontent.com/datasets/s-and-p-500/master/data/data.csv')
csv = csv.reader(f)
csv.next() # headers
dates = []
reinvested = []
last = None
total = 1.0
for line in csv:
date, value, dividends = line[:3]
if date == '2016-04-01':
break
date = datetime.date(*map(int, date.split('-')))
value = float(value)
dividends = float(dividends)
if last is not None:
sp_yield = value / last
dv_yield = dividends / last / 12
total *= (sp_yield + dv_yield)
last = value
reinvested.append(total)
dates.append(date)
plt.plot(dates, reinvested)
plt.yscale('log')
plt.title('S&P 500 total return')
plt.ylabel('Index (1870: 1.0)')
plt.savefig('sp500_return.png')
lump_returns = []
dcav_returns = []
n_years = 5
interval = 12*n_years
for offset in xrange(len(reinvested)-interval):
streams_lump = [-1.0] + [0.0] * (interval-1) + [reinvested[offset + interval] / reinvested[offset]]
streams_dcav = [-1.0] * interval + [sum([reinvested[offset + interval] / r for r in reinvested[offset : offset + interval]])]
lump_returns.append(numpy.irr(streams_lump) * 12 * 100)
dcav_returns.append(numpy.irr(streams_dcav) * 12 * 100)
lump_gain = [int(l > 0) for l in lump_returns]
dcav_gain = [int(l > 0) for l in dcav_returns]
print sum(lump_gain) / len(lump_gain)
print sum(dcav_gain) / len(dcav_gain)
print(scipy.stats.ttest_ind(lump_gain, dcav_gain))
plt.clf()
amin, amax = (int(math.floor(f((lump_returns, dcav_returns)))) for f in (numpy.amin, numpy.amax))
bins = range(amin, amax, 2)
seaborn.distplot(lump_returns, label='Lump investment returns (mean=%.2f%%)' % numpy.mean(lump_returns), bins=bins)
seaborn.distplot(dcav_returns, label='Dollar cost averaging returns (mean=%.2f%%)' % numpy.mean(dcav_returns), bins=bins)
s, p = scipy.stats.wilcoxon(lump_returns, dcav_returns)
plt.title('Lump vs dollar cost returns for a %d year horizon (p=%f)' % (n_years, p))
plt.legend(loc=2)
plt.ylabel('Probability')
plt.xlabel('Annual return over %d years (%%)' % n_years)
plt.savefig('lump_vs_dcav.png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment