FaisalAl-Tameemi · September 18, 2016 22:18
diff --git a/01-02.py b/01-02.py
 # Working with multiple stocks

 """
 SPY is used for reference - it's the market
 Normalize by the first day's price to plot on "equal footing"
 """

 import os
 import pandas as pd
 import matplotlib.pyplot as plt


 def symbol_to_path(symbol, base_dir="data"):
    """Return CSV file path given ticker symbol."""
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))


 def get_data(symbols, dates):
    """Read stock data (adjusted close) for given symbols from CSV files."""
    df = pd.DataFrame(index=dates)
    if 'SPY' not in symbols:  # add SPY for reference, if absent
        symbols.insert(0, 'SPY')

    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
                parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])
        df_temp.rename(columns={'Adj Close': symbol}, inplace=True)
        df = df.join(df_temp)
        if symbol == 'SPY':  # drop dates SPY did not trade
            df = df.dropna(subset=["SPY"])

    return df


 def normalize_data(df):
  """Normalize stock prices using the first row of the dataframe."""
  return df / df.ix[0, :]


 def plot_data(df, title="Stock prices"):
    """Plot stock prices with a custom title and meaningful axis labels."""
    ax = df.plot(title=title, fontsize=12)
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    plt.show()
    

 def plot_selected(df, columns, start_index, end_index):
    """Plot the desired columns over index values in the given range."""
    df = normalize_data(df)
    plot_data(df.ix[start_index:end_index, columns])


 def test_run():
    # Define a date range
    dates = pd.date_range('2010-01-01', '2010-12-31')

    # Choose stock symbols to read
    symbols = ['GOOG', 'IBM', 'GLD']  # SPY will be added in get_data()
    
    # Get stock data
    df = get_data(symbols, dates)

    # Slice and plot
    plot_selected(df, ['SPY', 'IBM'], '2010-03-01', '2010-04-01')


 if __name__ == "__main__":
    test_run()
diff --git a/01-03.py b/01-03.py
 # Timing Python operations

 import time

 t1 = time.time()
 print 'Execute your function'
 t2 = time.time()
 print 'The time taken by print statement is {} seconds'.format(t2-t1)
diff --git a/01-04-1.py b/01-04-1.py
 """Bollinger Bands."""

 import os
 import pandas as pd
 import matplotlib.pyplot as plt

 def symbol_to_path(symbol, base_dir="data"):
    """Return CSV file path given ticker symbol."""
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))


 def get_data(symbols, dates):
    """Read stock data (adjusted close) for given symbols from CSV files."""
    df = pd.DataFrame(index=dates)
    if 'SPY' not in symbols:  # add SPY for reference, if absent
        symbols.insert(0, 'SPY')

    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
                parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])
        df_temp = df_temp.rename(columns={'Adj Close': symbol})
        df = df.join(df_temp)
        if symbol == 'SPY':  # drop dates SPY did not trade
            df = df.dropna(subset=["SPY"])

    return df


 def plot_data(df, title="Stock prices"):
    """Plot stock prices with a custom title and meaningful axis labels."""
    ax = df.plot(title=title, fontsize=12)
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    plt.show()


 def get_rolling_mean(values, window):
    """Return rolling mean of given values, using specified window size."""
    return pd.rolling_mean(values, window=window)


 def get_rolling_std(values, window):
    """Return rolling standard deviation of given values, using specified window size."""
    return pd.rolling_std(values, window=window)


 def get_bollinger_bands(rm, rstd):
    """Return upper and lower Bollinger Bands."""
    upper_band = rm + 2*rstd
    lower_band = rm - 2*rstd
    return upper_band, lower_band


 def test_run():
    # Read data
    dates = pd.date_range('2012-01-01', '2012-12-31')
    symbols = ['SPY']
    df = get_data(symbols, dates)

    # Compute Bollinger Bands
    # 1. Compute rolling mean
    rm_SPY = get_rolling_mean(df['SPY'], window=20)

    # 2. Compute rolling standard deviation
    rstd_SPY = get_rolling_std(df['SPY'], window=20)

    # 3. Compute upper and lower bands
    upper_band, lower_band = get_bollinger_bands(rm_SPY, rstd_SPY)
    
    # Plot raw SPY values, rolling mean and Bollinger Bands
    ax = df['SPY'].plot(title="Bollinger Bands", label='SPY')
    rm_SPY.plot(label='Rolling mean', ax=ax)
    upper_band.plot(label='upper band', ax=ax)
    lower_band.plot(label='lower band', ax=ax)

    # Add axis labels and legend
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    ax.legend(loc='upper left')
    plt.show()


 if __name__ == "__main__":
    test_run()
diff --git a/01-04-2.py b/01-04-2.py
 """
 Daily returns
 daily_ret[t] = (price[t]/price[t-1]) - 1

 Cumulative returns
 cumret[t] = (price[t]/price[0]) - 1
 """


 import os
 import pandas as pd
 import matplotlib.pyplot as plt

 def symbol_to_path(symbol, base_dir="data"):
    """Return CSV file path given ticker symbol."""
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))


 def get_data(symbols, dates):
    """Read stock data (adjusted close) for given symbols from CSV files."""
    df = pd.DataFrame(index=dates)
    if 'SPY' not in symbols:  # add SPY for reference, if absent
        symbols.insert(0, 'SPY')

    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
                parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])
        df_temp = df_temp.rename(columns={'Adj Close': symbol})
        df = df.join(df_temp)
        if symbol == 'SPY':  # drop dates SPY did not trade
            df = df.dropna(subset=["SPY"])

    return df


 def plot_data(df, title="Stock prices", xlabel="Date", ylabel="Price"):
    """Plot stock prices with a custom title and meaningful axis labels."""
    ax = df.plot(title=title, fontsize=12)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    plt.show()


 def compute_daily_returns(df):
    """Compute and return the daily return values."""
    daily_returns = df.pct_change()
    # Daily return values for the first date cannot be calculated. Set these to zero.
    daily_returns.ix[0, :] = 0
    
    # Alternative method
    # daily_returns = (df / df.shift(1)) - 1
    # daily_returns.ix[0, :] = 0
    return daily_returns


 def test_run():
    # Read data
    dates = pd.date_range('2012-07-01', '2012-07-31')  # one month only
    symbols = ['SPY','XOM']
    df = get_data(symbols, dates)
    plot_data(df)

    # Compute daily returns
    daily_returns = compute_daily_returns(df)
    plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")


 if __name__ == "__main__":
    test_run()
diff --git a/01-05.py b/01-05.py
 """
 Dealing with missing data:
 1. Fill forward (to avoid peeking into the future)
 2. Fill backward
 """

 def fill_missing_values(df_data):
    """Fill missing values in data frame, in place."""
    df_data.fillna(method='ffill', inplace=True)
    df_data.fillna(method='bfill', inplace=True)
    return df_data
diff --git a/01-06.py b/01-06.py
 """
 kurtosis (quantifies whether the shape of the data distribution matches the Gaussian distribution)
  + fat tails
  - skinny tails
  
 Scatterplots
  slope (Beta): how reactive a stock is to the market - higher Beta means
 the stock is more reactive to the market

 NOTE: slope != correlation
 correlation is a measure of how tightly do the individual points fit the line
  
  intercept (alpha): +ve --> the stock on avg is performing a little bit better
 than the market

 In many cases in financial research we assume the daily returns are normally distributed,
 but this can be dangerous because it ignores kurtosis or the probability in the
 tails.
 """

 # Compute daily returns
 daily_returns = compute_daily_returns(df)

 # Plot a histogram
 daily_returns.hist(bins=20)

 # Get mean as standard deviation
 mean = daily_returns['SPY'].mean()
 std = daily_returns['SPY'].std()

 plt.axvline(mean, color='w', linestyle='dashed', linewidth=2)
 plt.axvline(std, color='r', linestyle='dashed', linewidth=2)
 plt.axvline(-std, color='r', linestyle='dashed', linewidth=2)
 plt.show()

 # Compute kurtosis
 daily_returns.kurtosis()


 # Compute and plot two histograms on the same chart
 daily_returns['SPY'].hist(bins=20, label='SPY')
 daily_returns['XOM'].hist(bins=20, label='XOM')
 plt.legend(loc='upper right')
 plt.show()


 # Scatterplots
 daily_returns.plot(kind='scatter', x='SPY', y='XOM')  # SPY vs XOM
 beta_XOM, alpha_XOM = np.polyfit(daily_returns['SPY'], daily_returns['XOM'], 1)  # fit poly degree 1
 plt.plot(daily_returns['SPY'], beta_XOM*daily_returns['SPY'] + alpha_XOM, '-', color='r')

 daily_returns.plot(kind='scatter', x='SPY', y='GLD')  # SPY vs GLD
 beta_GLD, alpha_GLD = np.polyfit(daily_returns['SPY'], daily_returns['GLD'], 1)  # fit poly degree 1
 plt.plot(daily_returns['SPY'], beta_GLD*daily_returns['SPY'] + alpha_GLD, '-', color='r')

 # Calculate correlation coefficient
 daily_returns.corr(method='pearson')
diff --git a/01-07.txt b/01-07.txt
 Daily Portfolio Value
 Given:
  start_val = 1000000
  start_date = 2009-01-01
  end-date = 2011-12-31
  symbols = ['SPY', 'XOM', 'GOOG', 'GLD']
  allocs = [0.4, 0.4, 0.1, 0.1]
  
 Pseude-algo:
  start with prices df
  normed = prices/prices[0]
  alloced = normed*allocs
  pos_vals = alloced*start_val  # position values
  port_val = pos_vals.sum(axis=1)


 Portfolio Statistics
 daily_rets = daily_rets[1:]  # ignore 0

 4 key statistins:
 1) cum_ret = (port_val[-1]/port_val[0])-1  # port_val == portfolio value
 2) avg_daily_ret = daily_rets.mean()
 3) std_daily_ret = daily_rets.std()  # volatility
 4) sharpe_ratio


 SHARPE RATIO: risk adjusted return
 All else being equal:
 - lower risk is better
 - higher return is better
 SR also considers risk free rate of return

 Rp - portfolio return
 Rf - risk free rate of return (return rate on a savings account in a bank)
 sigma_p - std dev of portfolio return

 The form of Sharpe Ratio: (Rp - Rf) / sigma_p

 The value of a portfolio is directly proportional to the return
 it generates over some baseline (here risk-free rate), and inversely
 proportional to its volatility.

 SR = mean(daily_rets - daily_rf) / std(daily_rets)

 Note:
 a) mean is the expected value
 b) std(daily_rets - daily_rf) == std(daily_rets) since daily_rf is a const
 c) daily_rf == risk free rate
   - LIBOR
   - interest rate on 3 month T-bill
   - 0% (value that's commonly been used in the past few years) - good approximation
   
   to convert annual risk free rate into daily rate
   e.g. annual rate 10% or 0.1
   then daily_rf = (1 + 0.1)**(1/252) - 1
   
   
 SR can vary widely depending on how frequenty you sample (e.g. you sample prices
 every year/month/week/day)
 Original version of SR is that it's an annual measure, therefore if we sample
 at frequencies other than annual we need to add an adjusment factor
 SR_annualized = k * SR
 where k = sqrt(no samples per year)
  - daily k sqrt(252)
  - weekly k sqrt(52)
  - monthly k sqrt(12)
  
 Finally the SR = sqrt(252) * mean(daily_rets - daily_rf) / std(daily_rets)

 WARNING: use daily_rets.std() or np.std(daily_rets, ddof=1)
 Pandas uses the unbiased estimator (N-1 in the denominator), whereas Numpy by default does not. See http://stackoverflow.com/questions/24984178/different-std-in-pandas-vs-numpy
diff --git a/01-08.txt b/01-08.txt
 How to use an optimizer:
 1) Provide a function to optimize, e.g f(x) = x**2+4
 2) Provide an initial guess
 3) Call the optimizer
  import scipy.optimize as spo
  min_result = spo.minimize(f, guess, method='SLSQP', options={'disp': True})
  print min_result.x, min_result.fun

 Functions with multiple minima, any discontinuities or zero slope can be hard
 to minimize.

 Parameterized model
 e.g. f(x) = mx + b <-- model with two parameters m, b
     now we can use an optimizer to minimise the squared error
     to find the line of best fit for the model given the data
diff --git a/01-09.txt b/01-09.txt
 What is porfolio optimization?
  Given a set of assets and a time period, find an allocation of funds to 
 assets that miximizes performance.

 What is performance?
  We could choose from a number of metrics, including cumulative return,
 volatility or risk, and risk adjusted return (Sharpe Ratio).

 E.g cumulative return is the most trivial measure to use - simply investing all your money in the stock with maximum return (and none in others) would be your optimal portfolio, in this case. Hence, it is the easiest to solve for. But probably not the best for risk mitigation.


 Framing the problem (optimise for Sharpe Ratio):
  minimise f(X) = SR * -1 (we want to maximise the SR)
  where X is the allocation vector eg [.1, .4., .4, .1]
  
  ranges: limits on values
    0 <= X <= 1
  
  constraints: properties of X that must be 'true'
    X.sum() = 1.0
diff --git a/02-01.txt b/02-01.txt
 Market capitalization for a stock: # shares outstanding * price

 ETFs have 4 or 3 letters
 Mutual Funds usually have 5 letters
 Hedge Funds don't have abbreviations

 AUM - Assets Under Management - is the total amount of money being managed by the fund.

 How fund managers are rewarded:
 Expense ratio
  is typically a percentage of AUM, therefore higher the AUM value, greater the incentive.
 Two & Twenty
  This structure actually motivates both AUM accumulation ("Two") as well as
 Profits ("Twenty"). Here "Risk taking" is synonymous with aiming for greater
 profits, which is motivated by the Two & Twenty model.


 Hedge fund goals:
  - beat a benchmark* (portfolio may go down with the market)
  - absolute return (+ve returns no matter what; long/short positions)
 Metrics:
  - cumulative return
  - volatility (std)
  - risk/reward (Sharpe Ratio)
  
 *select benchmark that represent the type of your investment. E.g. if you invest
 in European stocks, use European stock index as the benchmark, not SPY.
diff --git a/02-02.txt b/02-02.txt
 Order: BUY,IBM,100,LIMIT,99.95  # BUY/SELL,stock,no shares,MARKET/LIMIT,price
 * LIMIT is the max price you are willing to pay

 then in the order book this will appear as (given this is the first order):
 BID 99.95 100

 SELL order will be reflected as ASK in the order book.

diff --git a/02-03.txt b/02-03.txt
 What a company is worth?
  INTRINSIC VALUE
    is based on future dividends. In other words, companies pay a certain amount
 to their investors every year based on how many shares they own. And this is the
 value of all future dividends going into the future.
    Future Value / Discount Rate
    = Total dividends per year / DR

  BOOK VALUE
    is the value of the company is we split it up into pieces and sold those
 individual pieces.
    Total assets (ignoring intangible assets) minus liabilities

  MARKET CAPITALIZATON
    is the value the market is placing on the company.
    No shares * price
    

 Many stock trading strategies look for deviations between intrinsic value and market cap.
diff --git a/04-01.txt b/04-01.txt
 Interview with Tammer Kamel

 Build strategy that is:
 1) Theoretically sound
 2) Empirically testable
 3) Simple
diff --git a/resources.md b/resources.md
	# Working with multiple stocks

	"""
	SPY is used for reference - it's the market
	Normalize by the first day's price to plot on "equal footing"
	"""

	import os
	import pandas as pd
	import matplotlib.pyplot as plt


	def symbol_to_path(symbol, base_dir="data"):
	"""Return CSV file path given ticker symbol."""
	return os.path.join(base_dir, "{}.csv".format(str(symbol)))


	def get_data(symbols, dates):
	"""Read stock data (adjusted close) for given symbols from CSV files."""
	df = pd.DataFrame(index=dates)
	if 'SPY' not in symbols: # add SPY for reference, if absent
	symbols.insert(0, 'SPY')

	for symbol in symbols:
	df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
	parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])
	df_temp.rename(columns={'Adj Close': symbol}, inplace=True)
	df = df.join(df_temp)
	if symbol == 'SPY': # drop dates SPY did not trade
	df = df.dropna(subset=["SPY"])

	return df


	def normalize_data(df):
	"""Normalize stock prices using the first row of the dataframe."""
	return df / df.ix[0, :]


	def plot_data(df, title="Stock prices"):
	"""Plot stock prices with a custom title and meaningful axis labels."""
	ax = df.plot(title=title, fontsize=12)
	ax.set_xlabel("Date")
	ax.set_ylabel("Price")
	plt.show()


	def plot_selected(df, columns, start_index, end_index):
	"""Plot the desired columns over index values in the given range."""
	df = normalize_data(df)
	plot_data(df.ix[start_index:end_index, columns])


	def test_run():
	# Define a date range
	dates = pd.date_range('2010-01-01', '2010-12-31')

	# Choose stock symbols to read
	symbols = ['GOOG', 'IBM', 'GLD'] # SPY will be added in get_data()

	# Get stock data
	df = get_data(symbols, dates)

	# Slice and plot
	plot_selected(df, ['SPY', 'IBM'], '2010-03-01', '2010-04-01')


	if __name__ == "__main__":
	test_run()
	# Timing Python operations

	import time

	t1 = time.time()
	print 'Execute your function'
	t2 = time.time()
	print 'The time taken by print statement is {} seconds'.format(t2-t1)
	"""Bollinger Bands."""

	import os
	import pandas as pd
	import matplotlib.pyplot as plt

	def symbol_to_path(symbol, base_dir="data"):
	"""Return CSV file path given ticker symbol."""
	return os.path.join(base_dir, "{}.csv".format(str(symbol)))


	def get_data(symbols, dates):
	"""Read stock data (adjusted close) for given symbols from CSV files."""
	df = pd.DataFrame(index=dates)
	if 'SPY' not in symbols: # add SPY for reference, if absent
	symbols.insert(0, 'SPY')

	for symbol in symbols:
	df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
	parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])
	df_temp = df_temp.rename(columns={'Adj Close': symbol})
	df = df.join(df_temp)
	if symbol == 'SPY': # drop dates SPY did not trade
	df = df.dropna(subset=["SPY"])

	return df


	def plot_data(df, title="Stock prices"):
	"""Plot stock prices with a custom title and meaningful axis labels."""
	ax = df.plot(title=title, fontsize=12)
	ax.set_xlabel("Date")
	ax.set_ylabel("Price")
	plt.show()


	def get_rolling_mean(values, window):
	"""Return rolling mean of given values, using specified window size."""
	return pd.rolling_mean(values, window=window)


	def get_rolling_std(values, window):
	"""Return rolling standard deviation of given values, using specified window size."""
	return pd.rolling_std(values, window=window)


	def get_bollinger_bands(rm, rstd):
	"""Return upper and lower Bollinger Bands."""
	upper_band = rm + 2*rstd
	lower_band = rm - 2*rstd
	return upper_band, lower_band


	def test_run():
	# Read data
	dates = pd.date_range('2012-01-01', '2012-12-31')
	symbols = ['SPY']
	df = get_data(symbols, dates)

	# Compute Bollinger Bands
	# 1. Compute rolling mean
	rm_SPY = get_rolling_mean(df['SPY'], window=20)

	# 2. Compute rolling standard deviation
	rstd_SPY = get_rolling_std(df['SPY'], window=20)

	# 3. Compute upper and lower bands
	upper_band, lower_band = get_bollinger_bands(rm_SPY, rstd_SPY)

	# Plot raw SPY values, rolling mean and Bollinger Bands
	ax = df['SPY'].plot(title="Bollinger Bands", label='SPY')
	rm_SPY.plot(label='Rolling mean', ax=ax)
	upper_band.plot(label='upper band', ax=ax)
	lower_band.plot(label='lower band', ax=ax)

	# Add axis labels and legend
	ax.set_xlabel("Date")
	ax.set_ylabel("Price")
	ax.legend(loc='upper left')
	plt.show()


	if __name__ == "__main__":
	test_run()
	"""
	Daily returns
	daily_ret[t] = (price[t]/price[t-1]) - 1

	Cumulative returns
	cumret[t] = (price[t]/price[0]) - 1
	"""


	import os
	import pandas as pd
	import matplotlib.pyplot as plt

	def symbol_to_path(symbol, base_dir="data"):
	"""Return CSV file path given ticker symbol."""
	return os.path.join(base_dir, "{}.csv".format(str(symbol)))


	def get_data(symbols, dates):
	"""Read stock data (adjusted close) for given symbols from CSV files."""
	df = pd.DataFrame(index=dates)
	if 'SPY' not in symbols: # add SPY for reference, if absent
	symbols.insert(0, 'SPY')

	for symbol in symbols:
	df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date',
	parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan'])
	df_temp = df_temp.rename(columns={'Adj Close': symbol})
	df = df.join(df_temp)
	if symbol == 'SPY': # drop dates SPY did not trade
	df = df.dropna(subset=["SPY"])

	return df


	def plot_data(df, title="Stock prices", xlabel="Date", ylabel="Price"):
	"""Plot stock prices with a custom title and meaningful axis labels."""
	ax = df.plot(title=title, fontsize=12)
	ax.set_xlabel(xlabel)
	ax.set_ylabel(ylabel)
	plt.show()


	def compute_daily_returns(df):
	"""Compute and return the daily return values."""
	daily_returns = df.pct_change()
	# Daily return values for the first date cannot be calculated. Set these to zero.
	daily_returns.ix[0, :] = 0

	# Alternative method
	# daily_returns = (df / df.shift(1)) - 1
	# daily_returns.ix[0, :] = 0
	return daily_returns


	def test_run():
	# Read data
	dates = pd.date_range('2012-07-01', '2012-07-31') # one month only
	symbols = ['SPY','XOM']
	df = get_data(symbols, dates)
	plot_data(df)

	# Compute daily returns
	daily_returns = compute_daily_returns(df)
	plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")


	if __name__ == "__main__":
	test_run()
	"""
	Dealing with missing data:
	1. Fill forward (to avoid peeking into the future)
	2. Fill backward
	"""

	def fill_missing_values(df_data):
	"""Fill missing values in data frame, in place."""
	df_data.fillna(method='ffill', inplace=True)
	df_data.fillna(method='bfill', inplace=True)
	return df_data
	"""
	kurtosis (quantifies whether the shape of the data distribution matches the Gaussian distribution)
	+ fat tails
	- skinny tails

	Scatterplots
	slope (Beta): how reactive a stock is to the market - higher Beta means
	the stock is more reactive to the market

	NOTE: slope != correlation
	correlation is a measure of how tightly do the individual points fit the line

	intercept (alpha): +ve --> the stock on avg is performing a little bit better
	than the market

	In many cases in financial research we assume the daily returns are normally distributed,
	but this can be dangerous because it ignores kurtosis or the probability in the
	tails.
	"""

	# Compute daily returns
	daily_returns = compute_daily_returns(df)

	# Plot a histogram
	daily_returns.hist(bins=20)

	# Get mean as standard deviation
	mean = daily_returns['SPY'].mean()
	std = daily_returns['SPY'].std()

	plt.axvline(mean, color='w', linestyle='dashed', linewidth=2)
	plt.axvline(std, color='r', linestyle='dashed', linewidth=2)
	plt.axvline(-std, color='r', linestyle='dashed', linewidth=2)
	plt.show()

	# Compute kurtosis
	daily_returns.kurtosis()


	# Compute and plot two histograms on the same chart
	daily_returns['SPY'].hist(bins=20, label='SPY')
	daily_returns['XOM'].hist(bins=20, label='XOM')
	plt.legend(loc='upper right')
	plt.show()


	# Scatterplots
	daily_returns.plot(kind='scatter', x='SPY', y='XOM') # SPY vs XOM
	beta_XOM, alpha_XOM = np.polyfit(daily_returns['SPY'], daily_returns['XOM'], 1) # fit poly degree 1
	plt.plot(daily_returns['SPY'], beta_XOM*daily_returns['SPY'] + alpha_XOM, '-', color='r')

	daily_returns.plot(kind='scatter', x='SPY', y='GLD') # SPY vs GLD
	beta_GLD, alpha_GLD = np.polyfit(daily_returns['SPY'], daily_returns['GLD'], 1) # fit poly degree 1
	plt.plot(daily_returns['SPY'], beta_GLD*daily_returns['SPY'] + alpha_GLD, '-', color='r')

	# Calculate correlation coefficient
	daily_returns.corr(method='pearson')
	Daily Portfolio Value
	Given:
	start_val = 1000000
	start_date = 2009-01-01
	end-date = 2011-12-31
	symbols = ['SPY', 'XOM', 'GOOG', 'GLD']
	allocs = [0.4, 0.4, 0.1, 0.1]

	Pseude-algo:
	start with prices df
	normed = prices/prices[0]
	alloced = normed*allocs
	pos_vals = alloced*start_val # position values
	port_val = pos_vals.sum(axis=1)


	Portfolio Statistics
	daily_rets = daily_rets[1:] # ignore 0

	4 key statistins:
	1) cum_ret = (port_val[-1]/port_val[0])-1 # port_val == portfolio value
	2) avg_daily_ret = daily_rets.mean()
	3) std_daily_ret = daily_rets.std() # volatility
	4) sharpe_ratio


	SHARPE RATIO: risk adjusted return
	All else being equal:
	- lower risk is better
	- higher return is better
	SR also considers risk free rate of return

	Rp - portfolio return
	Rf - risk free rate of return (return rate on a savings account in a bank)
	sigma_p - std dev of portfolio return

	The form of Sharpe Ratio: (Rp - Rf) / sigma_p

	The value of a portfolio is directly proportional to the return
	it generates over some baseline (here risk-free rate), and inversely
	proportional to its volatility.

	SR = mean(daily_rets - daily_rf) / std(daily_rets)

	Note:
	a) mean is the expected value
	b) std(daily_rets - daily_rf) == std(daily_rets) since daily_rf is a const
	c) daily_rf == risk free rate
	- LIBOR
	- interest rate on 3 month T-bill
	- 0% (value that's commonly been used in the past few years) - good approximation

	to convert annual risk free rate into daily rate
	e.g. annual rate 10% or 0.1
	then daily_rf = (1 + 0.1)**(1/252) - 1


	SR can vary widely depending on how frequenty you sample (e.g. you sample prices
	every year/month/week/day)
	Original version of SR is that it's an annual measure, therefore if we sample
	at frequencies other than annual we need to add an adjusment factor
	SR_annualized = k * SR
	where k = sqrt(no samples per year)
	- daily k sqrt(252)
	- weekly k sqrt(52)
	- monthly k sqrt(12)

	Finally the SR = sqrt(252) * mean(daily_rets - daily_rf) / std(daily_rets)

	WARNING: use daily_rets.std() or np.std(daily_rets, ddof=1)
	Pandas uses the unbiased estimator (N-1 in the denominator), whereas Numpy by default does not. See http://stackoverflow.com/questions/24984178/different-std-in-pandas-vs-numpy
	How to use an optimizer:
	1) Provide a function to optimize, e.g f(x) = x**2+4
	2) Provide an initial guess
	3) Call the optimizer
	import scipy.optimize as spo
	min_result = spo.minimize(f, guess, method='SLSQP', options={'disp': True})
	print min_result.x, min_result.fun

	Functions with multiple minima, any discontinuities or zero slope can be hard
	to minimize.

	Parameterized model
	e.g. f(x) = mx + b <-- model with two parameters m, b
	now we can use an optimizer to minimise the squared error
	to find the line of best fit for the model given the data
	What is porfolio optimization?
	Given a set of assets and a time period, find an allocation of funds to
	assets that miximizes performance.

	What is performance?
	We could choose from a number of metrics, including cumulative return,
	volatility or risk, and risk adjusted return (Sharpe Ratio).

	E.g cumulative return is the most trivial measure to use - simply investing all your money in the stock with maximum return (and none in others) would be your optimal portfolio, in this case. Hence, it is the easiest to solve for. But probably not the best for risk mitigation.


	Framing the problem (optimise for Sharpe Ratio):
	minimise f(X) = SR * -1 (we want to maximise the SR)
	where X is the allocation vector eg [.1, .4., .4, .1]

	ranges: limits on values
	0 <= X <= 1

	constraints: properties of X that must be 'true'
	X.sum() = 1.0
	Market capitalization for a stock: # shares outstanding * price

	ETFs have 4 or 3 letters
	Mutual Funds usually have 5 letters
	Hedge Funds don't have abbreviations

	AUM - Assets Under Management - is the total amount of money being managed by the fund.

	How fund managers are rewarded:
	Expense ratio
	is typically a percentage of AUM, therefore higher the AUM value, greater the incentive.
	Two & Twenty
	This structure actually motivates both AUM accumulation ("Two") as well as
	Profits ("Twenty"). Here "Risk taking" is synonymous with aiming for greater
	profits, which is motivated by the Two & Twenty model.


	Hedge fund goals:
	- beat a benchmark* (portfolio may go down with the market)
	- absolute return (+ve returns no matter what; long/short positions)
	Metrics:
	- cumulative return
	- volatility (std)
	- risk/reward (Sharpe Ratio)

	*select benchmark that represent the type of your investment. E.g. if you invest
	in European stocks, use European stock index as the benchmark, not SPY.