-
-
Save FaisalAl-Tameemi/d9f461f0bd901e70b21bd14f12e799ed to your computer and use it in GitHub Desktop.
Udacity: Machine Learning for Trading
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Working with multiple stocks | |
""" | |
SPY is used for reference - it's the market | |
Normalize by the first day's price to plot on "equal footing" | |
""" | |
import os | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
def symbol_to_path(symbol, base_dir="data"): | |
"""Return CSV file path given ticker symbol.""" | |
return os.path.join(base_dir, "{}.csv".format(str(symbol))) | |
def get_data(symbols, dates): | |
"""Read stock data (adjusted close) for given symbols from CSV files.""" | |
df = pd.DataFrame(index=dates) | |
if 'SPY' not in symbols: # add SPY for reference, if absent | |
symbols.insert(0, 'SPY') | |
for symbol in symbols: | |
df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date', | |
parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan']) | |
df_temp.rename(columns={'Adj Close': symbol}, inplace=True) | |
df = df.join(df_temp) | |
if symbol == 'SPY': # drop dates SPY did not trade | |
df = df.dropna(subset=["SPY"]) | |
return df | |
def normalize_data(df): | |
"""Normalize stock prices using the first row of the dataframe.""" | |
return df / df.ix[0, :] | |
def plot_data(df, title="Stock prices"): | |
"""Plot stock prices with a custom title and meaningful axis labels.""" | |
ax = df.plot(title=title, fontsize=12) | |
ax.set_xlabel("Date") | |
ax.set_ylabel("Price") | |
plt.show() | |
def plot_selected(df, columns, start_index, end_index): | |
"""Plot the desired columns over index values in the given range.""" | |
df = normalize_data(df) | |
plot_data(df.ix[start_index:end_index, columns]) | |
def test_run(): | |
# Define a date range | |
dates = pd.date_range('2010-01-01', '2010-12-31') | |
# Choose stock symbols to read | |
symbols = ['GOOG', 'IBM', 'GLD'] # SPY will be added in get_data() | |
# Get stock data | |
df = get_data(symbols, dates) | |
# Slice and plot | |
plot_selected(df, ['SPY', 'IBM'], '2010-03-01', '2010-04-01') | |
if __name__ == "__main__": | |
test_run() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Timing Python operations | |
import time | |
t1 = time.time() | |
print 'Execute your function' | |
t2 = time.time() | |
print 'The time taken by print statement is {} seconds'.format(t2-t1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Bollinger Bands.""" | |
import os | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
def symbol_to_path(symbol, base_dir="data"): | |
"""Return CSV file path given ticker symbol.""" | |
return os.path.join(base_dir, "{}.csv".format(str(symbol))) | |
def get_data(symbols, dates): | |
"""Read stock data (adjusted close) for given symbols from CSV files.""" | |
df = pd.DataFrame(index=dates) | |
if 'SPY' not in symbols: # add SPY for reference, if absent | |
symbols.insert(0, 'SPY') | |
for symbol in symbols: | |
df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date', | |
parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan']) | |
df_temp = df_temp.rename(columns={'Adj Close': symbol}) | |
df = df.join(df_temp) | |
if symbol == 'SPY': # drop dates SPY did not trade | |
df = df.dropna(subset=["SPY"]) | |
return df | |
def plot_data(df, title="Stock prices"): | |
"""Plot stock prices with a custom title and meaningful axis labels.""" | |
ax = df.plot(title=title, fontsize=12) | |
ax.set_xlabel("Date") | |
ax.set_ylabel("Price") | |
plt.show() | |
def get_rolling_mean(values, window): | |
"""Return rolling mean of given values, using specified window size.""" | |
return pd.rolling_mean(values, window=window) | |
def get_rolling_std(values, window): | |
"""Return rolling standard deviation of given values, using specified window size.""" | |
return pd.rolling_std(values, window=window) | |
def get_bollinger_bands(rm, rstd): | |
"""Return upper and lower Bollinger Bands.""" | |
upper_band = rm + 2*rstd | |
lower_band = rm - 2*rstd | |
return upper_band, lower_band | |
def test_run(): | |
# Read data | |
dates = pd.date_range('2012-01-01', '2012-12-31') | |
symbols = ['SPY'] | |
df = get_data(symbols, dates) | |
# Compute Bollinger Bands | |
# 1. Compute rolling mean | |
rm_SPY = get_rolling_mean(df['SPY'], window=20) | |
# 2. Compute rolling standard deviation | |
rstd_SPY = get_rolling_std(df['SPY'], window=20) | |
# 3. Compute upper and lower bands | |
upper_band, lower_band = get_bollinger_bands(rm_SPY, rstd_SPY) | |
# Plot raw SPY values, rolling mean and Bollinger Bands | |
ax = df['SPY'].plot(title="Bollinger Bands", label='SPY') | |
rm_SPY.plot(label='Rolling mean', ax=ax) | |
upper_band.plot(label='upper band', ax=ax) | |
lower_band.plot(label='lower band', ax=ax) | |
# Add axis labels and legend | |
ax.set_xlabel("Date") | |
ax.set_ylabel("Price") | |
ax.legend(loc='upper left') | |
plt.show() | |
if __name__ == "__main__": | |
test_run() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Daily returns | |
daily_ret[t] = (price[t]/price[t-1]) - 1 | |
Cumulative returns | |
cumret[t] = (price[t]/price[0]) - 1 | |
""" | |
import os | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
def symbol_to_path(symbol, base_dir="data"): | |
"""Return CSV file path given ticker symbol.""" | |
return os.path.join(base_dir, "{}.csv".format(str(symbol))) | |
def get_data(symbols, dates): | |
"""Read stock data (adjusted close) for given symbols from CSV files.""" | |
df = pd.DataFrame(index=dates) | |
if 'SPY' not in symbols: # add SPY for reference, if absent | |
symbols.insert(0, 'SPY') | |
for symbol in symbols: | |
df_temp = pd.read_csv(symbol_to_path(symbol), index_col='Date', | |
parse_dates=True, usecols=['Date', 'Adj Close'], na_values=['nan']) | |
df_temp = df_temp.rename(columns={'Adj Close': symbol}) | |
df = df.join(df_temp) | |
if symbol == 'SPY': # drop dates SPY did not trade | |
df = df.dropna(subset=["SPY"]) | |
return df | |
def plot_data(df, title="Stock prices", xlabel="Date", ylabel="Price"): | |
"""Plot stock prices with a custom title and meaningful axis labels.""" | |
ax = df.plot(title=title, fontsize=12) | |
ax.set_xlabel(xlabel) | |
ax.set_ylabel(ylabel) | |
plt.show() | |
def compute_daily_returns(df): | |
"""Compute and return the daily return values.""" | |
daily_returns = df.pct_change() | |
# Daily return values for the first date cannot be calculated. Set these to zero. | |
daily_returns.ix[0, :] = 0 | |
# Alternative method | |
# daily_returns = (df / df.shift(1)) - 1 | |
# daily_returns.ix[0, :] = 0 | |
return daily_returns | |
def test_run(): | |
# Read data | |
dates = pd.date_range('2012-07-01', '2012-07-31') # one month only | |
symbols = ['SPY','XOM'] | |
df = get_data(symbols, dates) | |
plot_data(df) | |
# Compute daily returns | |
daily_returns = compute_daily_returns(df) | |
plot_data(daily_returns, title="Daily returns", ylabel="Daily returns") | |
if __name__ == "__main__": | |
test_run() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Dealing with missing data: | |
1. Fill forward (to avoid peeking into the future) | |
2. Fill backward | |
""" | |
def fill_missing_values(df_data): | |
"""Fill missing values in data frame, in place.""" | |
df_data.fillna(method='ffill', inplace=True) | |
df_data.fillna(method='bfill', inplace=True) | |
return df_data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
kurtosis (quantifies whether the shape of the data distribution matches the Gaussian distribution) | |
+ fat tails | |
- skinny tails | |
Scatterplots | |
slope (Beta): how reactive a stock is to the market - higher Beta means | |
the stock is more reactive to the market | |
NOTE: slope != correlation | |
correlation is a measure of how tightly do the individual points fit the line | |
intercept (alpha): +ve --> the stock on avg is performing a little bit better | |
than the market | |
In many cases in financial research we assume the daily returns are normally distributed, | |
but this can be dangerous because it ignores kurtosis or the probability in the | |
tails. | |
""" | |
# Compute daily returns | |
daily_returns = compute_daily_returns(df) | |
# Plot a histogram | |
daily_returns.hist(bins=20) | |
# Get mean as standard deviation | |
mean = daily_returns['SPY'].mean() | |
std = daily_returns['SPY'].std() | |
plt.axvline(mean, color='w', linestyle='dashed', linewidth=2) | |
plt.axvline(std, color='r', linestyle='dashed', linewidth=2) | |
plt.axvline(-std, color='r', linestyle='dashed', linewidth=2) | |
plt.show() | |
# Compute kurtosis | |
daily_returns.kurtosis() | |
# Compute and plot two histograms on the same chart | |
daily_returns['SPY'].hist(bins=20, label='SPY') | |
daily_returns['XOM'].hist(bins=20, label='XOM') | |
plt.legend(loc='upper right') | |
plt.show() | |
# Scatterplots | |
daily_returns.plot(kind='scatter', x='SPY', y='XOM') # SPY vs XOM | |
beta_XOM, alpha_XOM = np.polyfit(daily_returns['SPY'], daily_returns['XOM'], 1) # fit poly degree 1 | |
plt.plot(daily_returns['SPY'], beta_XOM*daily_returns['SPY'] + alpha_XOM, '-', color='r') | |
daily_returns.plot(kind='scatter', x='SPY', y='GLD') # SPY vs GLD | |
beta_GLD, alpha_GLD = np.polyfit(daily_returns['SPY'], daily_returns['GLD'], 1) # fit poly degree 1 | |
plt.plot(daily_returns['SPY'], beta_GLD*daily_returns['SPY'] + alpha_GLD, '-', color='r') | |
# Calculate correlation coefficient | |
daily_returns.corr(method='pearson') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Daily Portfolio Value | |
Given: | |
start_val = 1000000 | |
start_date = 2009-01-01 | |
end-date = 2011-12-31 | |
symbols = ['SPY', 'XOM', 'GOOG', 'GLD'] | |
allocs = [0.4, 0.4, 0.1, 0.1] | |
Pseude-algo: | |
start with prices df | |
normed = prices/prices[0] | |
alloced = normed*allocs | |
pos_vals = alloced*start_val # position values | |
port_val = pos_vals.sum(axis=1) | |
Portfolio Statistics | |
daily_rets = daily_rets[1:] # ignore 0 | |
4 key statistins: | |
1) cum_ret = (port_val[-1]/port_val[0])-1 # port_val == portfolio value | |
2) avg_daily_ret = daily_rets.mean() | |
3) std_daily_ret = daily_rets.std() # volatility | |
4) sharpe_ratio | |
SHARPE RATIO: risk adjusted return | |
All else being equal: | |
- lower risk is better | |
- higher return is better | |
SR also considers risk free rate of return | |
Rp - portfolio return | |
Rf - risk free rate of return (return rate on a savings account in a bank) | |
sigma_p - std dev of portfolio return | |
The form of Sharpe Ratio: (Rp - Rf) / sigma_p | |
The value of a portfolio is directly proportional to the return | |
it generates over some baseline (here risk-free rate), and inversely | |
proportional to its volatility. | |
SR = mean(daily_rets - daily_rf) / std(daily_rets) | |
Note: | |
a) mean is the expected value | |
b) std(daily_rets - daily_rf) == std(daily_rets) since daily_rf is a const | |
c) daily_rf == risk free rate | |
- LIBOR | |
- interest rate on 3 month T-bill | |
- 0% (value that's commonly been used in the past few years) - good approximation | |
to convert annual risk free rate into daily rate | |
e.g. annual rate 10% or 0.1 | |
then daily_rf = (1 + 0.1)**(1/252) - 1 | |
SR can vary widely depending on how frequenty you sample (e.g. you sample prices | |
every year/month/week/day) | |
Original version of SR is that it's an annual measure, therefore if we sample | |
at frequencies other than annual we need to add an adjusment factor | |
SR_annualized = k * SR | |
where k = sqrt(no samples per year) | |
- daily k sqrt(252) | |
- weekly k sqrt(52) | |
- monthly k sqrt(12) | |
Finally the SR = sqrt(252) * mean(daily_rets - daily_rf) / std(daily_rets) | |
WARNING: use daily_rets.std() or np.std(daily_rets, ddof=1) | |
Pandas uses the unbiased estimator (N-1 in the denominator), whereas Numpy by default does not. See http://stackoverflow.com/questions/24984178/different-std-in-pandas-vs-numpy |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
How to use an optimizer: | |
1) Provide a function to optimize, e.g f(x) = x**2+4 | |
2) Provide an initial guess | |
3) Call the optimizer | |
import scipy.optimize as spo | |
min_result = spo.minimize(f, guess, method='SLSQP', options={'disp': True}) | |
print min_result.x, min_result.fun | |
Functions with multiple minima, any discontinuities or zero slope can be hard | |
to minimize. | |
Parameterized model | |
e.g. f(x) = mx + b <-- model with two parameters m, b | |
now we can use an optimizer to minimise the squared error | |
to find the line of best fit for the model given the data |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
What is porfolio optimization? | |
Given a set of assets and a time period, find an allocation of funds to | |
assets that miximizes performance. | |
What is performance? | |
We could choose from a number of metrics, including cumulative return, | |
volatility or risk, and risk adjusted return (Sharpe Ratio). | |
E.g cumulative return is the most trivial measure to use - simply investing all your money in the stock with maximum return (and none in others) would be your optimal portfolio, in this case. Hence, it is the easiest to solve for. But probably not the best for risk mitigation. | |
Framing the problem (optimise for Sharpe Ratio): | |
minimise f(X) = SR * -1 (we want to maximise the SR) | |
where X is the allocation vector eg [.1, .4., .4, .1] | |
ranges: limits on values | |
0 <= X <= 1 | |
constraints: properties of X that must be 'true' | |
X.sum() = 1.0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Market capitalization for a stock: # shares outstanding * price | |
ETFs have 4 or 3 letters | |
Mutual Funds usually have 5 letters | |
Hedge Funds don't have abbreviations | |
AUM - Assets Under Management - is the total amount of money being managed by the fund. | |
How fund managers are rewarded: | |
Expense ratio | |
is typically a percentage of AUM, therefore higher the AUM value, greater the incentive. | |
Two & Twenty | |
This structure actually motivates both AUM accumulation ("Two") as well as | |
Profits ("Twenty"). Here "Risk taking" is synonymous with aiming for greater | |
profits, which is motivated by the Two & Twenty model. | |
Hedge fund goals: | |
- beat a benchmark* (portfolio may go down with the market) | |
- absolute return (+ve returns no matter what; long/short positions) | |
Metrics: | |
- cumulative return | |
- volatility (std) | |
- risk/reward (Sharpe Ratio) | |
*select benchmark that represent the type of your investment. E.g. if you invest | |
in European stocks, use European stock index as the benchmark, not SPY. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Order: BUY,IBM,100,LIMIT,99.95 # BUY/SELL,stock,no shares,MARKET/LIMIT,price | |
* LIMIT is the max price you are willing to pay | |
then in the order book this will appear as (given this is the first order): | |
BID 99.95 100 | |
SELL order will be reflected as ASK in the order book. | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
What a company is worth? | |
INTRINSIC VALUE | |
is based on future dividends. In other words, companies pay a certain amount | |
to their investors every year based on how many shares they own. And this is the | |
value of all future dividends going into the future. | |
Future Value / Discount Rate | |
= Total dividends per year / DR | |
BOOK VALUE | |
is the value of the company is we split it up into pieces and sold those | |
individual pieces. | |
Total assets (ignoring intangible assets) minus liabilities | |
MARKET CAPITALIZATON | |
is the value the market is placing on the company. | |
No shares * price | |
Many stock trading strategies look for deviations between intrinsic value and market cap. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Interview with Tammer Kamel | |
Build strategy that is: | |
1) Theoretically sound | |
2) Empirically testable | |
3) Simple |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment