Created
January 22, 2021 21:10
-
-
Save hugo1005/38cfc80a5e5cf107951a01dd6bd1e40d to your computer and use it in GitHub Desktop.
portfolio_opimization
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import pandas as pd | |
import time | |
import json | |
import numpy as np | |
import scipy as sp | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from tqdm import tqdm | |
import cvxopt as opt | |
import cvxopt.solvers as optsolvers | |
import warnings | |
import arviz as az | |
def tangency_portfolio(cov_mat, exp_rets): | |
n = len(cov_mat) | |
# yT P y + qT y | |
P = opt.matrix(cov_mat) # Correct | |
q = opt.matrix(0.0, (n, 1)) # Correct | |
# Equality Constraint Ax = b | |
A_np = np.zeros((1,n)) | |
A_np[0,:] = exp_rets | |
A = opt.matrix(A_np) | |
b = opt.matrix(1.0) | |
# Inequality Constraints Gy = h | |
G_np = np.identity(n) * -1 | |
h_np = np.zeros((n,1)) | |
G = opt.matrix(G_np) | |
h = opt.matrix(h_np) | |
# Solve | |
optsolvers.options['show_progress'] = False | |
sol = optsolvers.qp(P, q, G, h, A, b) | |
if sol['status'] != 'optimal': | |
warnings.warn("Convergence problem") | |
# Put weights into a labeled series | |
weights = pd.Series(sol['x']) | |
# Rescale weights, so that sum(weights) = 1 | |
weights /= weights.sum() | |
return weights | |
def sample_desirable_weights(N, p): | |
alphas = np.ones((p,)) | |
weights = np.abs(sp.stats.dirichlet.rvs(alphas, size=N)) | |
return weights | |
def compute_mean_variances(weights, exp_rets, cov): | |
means = weights @ exp_rets | |
variances = np.diag(weights @ cov @ weights.T) | |
return means, variances | |
def standardised_mean_var(means, vars): | |
std_mean = (means - means.mean()) / means.std() | |
std_var = (vars - vars.mean()) / vars.std() | |
return std_mean, std_var | |
def compute_optimal_mean_var_weights(exp_rets, cov): | |
tang_weights = np.array([w for _,w in tangency_portfolio(cov, exp_rets).items()]).reshape((1,-1)) | |
tang_mean = tang_weights @ exp_rets | |
tang_var = tang_weights @ cov @ tang_weights.T | |
return tang_mean, tang_var, tang_weights | |
def get_best_weights(weights, means, variances, distances): | |
best_mean = means[distances == distances.min()] | |
best_var = variances[distances == distances.min()] | |
best_weights = weights[distances == distances.min()] | |
return best_mean, best_var, best_weights | |
def compute_projected_distance_from_optimal(std_tang_mean, std_tang_var, std_mean, std_var): | |
optimal = np.array([std_tang_mean,std_tang_var]).repeat(N, axis=1) | |
delta = (np.array([std_mean, std_var])-optimal)**2 | |
distances = delta.sum(axis=0) | |
return distances | |
def compute_best_desirable_weights(returns, n_samples = 20000): | |
exp_rets = (returns.mean(axis=0) * 100).values | |
cov = returns.cov().values | |
# Tangent Portfolio (Sharpe Optimal) | |
tang_mean, tang_var, tang_weights = compute_optimal_mean_var_weights( | |
exp_rets, | |
cov | |
) | |
# Monte Carlo Desirable Portfolios | |
weights = sample_desirable_weights(n_samples, cov.shape[0]) | |
means, variances = compute_mean_variances(weights, exp_rets, cov) | |
distances = compute_projected_distance_from_optimal( | |
tang_mean, | |
tang_var, | |
means, | |
variances | |
) | |
# Desirable Portfolio closests to the optimal in mean - variance space | |
best_mean, best_var, best_weights = get_best_weights(weights, means, variances, distances) | |
return best_mean, best_var, best_weights | |
def bootstrap_desirable_weights(returns, n_bootstrap_samples = 1000, n_monte_carlo_draws = 20000): | |
monte_carlo_weights = [] | |
n_equities = returns.shape[1] | |
for i in tqdm(range(n_bootstrap_samples)): | |
re_sampled_returns = returns.sample(frac=1, replace=True) | |
_, _, weights = compute_best_desirable_weights(re_sampled_returns, n_samples = n_monte_carlo_draws) | |
monte_carlo_weights.append(weights) | |
return pd.DataFrame(np.array(monte_carlo_weights).reshape(n_bootstrap_samples, n_equities)) | |
def sample_optimal_weights(returns, n_bootstrap_samples = 1000): | |
monte_carlo_weights = [] | |
n_equities = returns.shape[1] | |
for i in tqdm(range(n_bootstrap_samples)): | |
re_sampled_returns = returns.sample(frac=1, replace=True) | |
exp_rets = (re_sampled_returns.mean(axis=0) * 100).values | |
cov = re_sampled_returns.cov().values | |
tang_mean, tang_var, tang_weights = compute_optimal_mean_var_weights( | |
exp_rets, | |
cov | |
) | |
monte_carlo_weights.append(tang_weights) | |
return pd.DataFrame(np.array(monte_carlo_weights).reshape(n_bootstrap_samples, n_equities)) | |
# Loading Data | |
equities = pd.read_csv("Enter your price data here.csv") | |
prices = equities.drop(columns='PORT') | |
returns = prices.pct_change().dropna() | |
exp_rets = returns.mean(axis=0).values * 100 | |
cov = returns.cov().values | |
# Monte Carlo Search & Bootstrapping | |
best_weight = compute_best_desirable_weights(returns) | |
weight_distribution = bootstrap_desirable_weights(returns) | |
# Computing Markowitz Optimal and Bootstrapped Weights | |
optimal_weight = np.array([w for _,w in tangency_portfolio(cov, exp_rets).items()]).reshape((1,-1)) | |
weight_distribution_optimal = sample_optimal_weights(returns) | |
# Distribution Plot | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
font_title = {'family': 'roboto', | |
'color': '#143d68', | |
'weight': 'bold', | |
'size': 16, | |
'alpha': 0.7 | |
} | |
fig = plt.figure(figsize=(30,15)) | |
for idx, sector in enumerate(weights_df.columns): | |
row = idx // 4 | |
col = idx - 4 * row | |
ax = plt.subplot2grid((4,4),(row,col)) | |
sns.distplot(weights_df[sector],ax=ax) | |
ax.spines["right"].set_visible(False) | |
ax.spines["top"].set_visible(False) | |
ax.spines["left"].set_visible(False) | |
ax.spines["bottom"].set_visible(False) | |
ax.set_facecolor((0,0,0,0.05)) | |
if idx in [7,8,9,10]: | |
ax.set_xlabel('Weight') | |
if idx in [0,4,8]: | |
ax.set_ylabel('Frequency') | |
ax.set_xlim(0,.6) | |
ax.set_yticks([0]) | |
ax.set_xticks([0,0.5]) | |
# ax.axvline(weights_df[sector].median(), c='k', alpha=0.5, linestyle='dashed') | |
ax.axvline(best_weight[2][0][idx], c='r', alpha=0.5, linestyle='dashed') | |
ax.set_title(sector +' optimal: ' + str(round(best_weight[2][0][idx],3)), loc='left', fontdict=font_title) | |
plt.tight_layout() | |
# Forest Plots | |
plt.figure(figsize=(30,7)) | |
weights_df = weight_distribution | |
weights_df.columns = prices.columns | |
opt_weights_df = weight_distribution_optimal | |
opt_weights_df.columns = prices.columns | |
ax = plt.subplot2grid((1,3),(0,1)) | |
az.plot_forest({col: weights_df[col] for col in weights_df.columns}, ax = ax, hdi_prob=0.95,linewidth=0.3, colors=['b']) | |
ax.spines["right"].set_visible(False) | |
ax.spines["top"].set_visible(False) | |
ax.spines["left"].set_visible(False) | |
ax.spines["bottom"].set_visible(False) | |
ax.set_facecolor((0,0,0,0.05)) | |
ax.set_title("Extremities & Highest Density Posterior Intervals\nFor Bootstrapped Dirichlet Portfolios \n\n", loc='left', fontdict=font_title) | |
variances_best_weights = np.diag(weight_distribution.values @ cov @ weight_distribution.values.T) | |
means_best_weights = weight_distribution.values @ exp_rets | |
variances_opt_weights = np.diag(weight_distribution_optimal.values @ cov @ weight_distribution_optimal.values.T) | |
means_opt_weights = weight_distribution_optimal.values @ exp_rets | |
ax = plt.subplot2grid((1,3),(0,0)) | |
# cmap = sns.cubehelix_palette(start=0, light=1, as_cmap=True) | |
# sns.kdeplot(x=variances_best_weights, y=means_best_weights,fill=True,cmap=cmap) | |
ax.scatter(variances_best_weights,means_best_weights,c='lightblue', alpha=0.9) | |
ax.scatter(variances_best_weights, means_opt_weights, c='r', alpha=0.04) | |
best_mean, best_var = compute_mean_variances(best_weight[2], exp_rets, cov) | |
ax.scatter(best_var,best_mean) | |
m_opt, v_opt = compute_mean_variances(optimal_weight, exp_rets, cov) | |
ax.scatter(v_opt, m_opt) | |
ax.set_xlabel('Variance') | |
ax.set_ylabel('Expected Daily Returns') | |
ax.spines["right"].set_visible(False) | |
ax.spines["top"].set_visible(False) | |
ax.spines["left"].set_visible(False) | |
ax.spines["bottom"].set_visible(False) | |
ax.set_facecolor((0,0,0,0.05)) | |
ax.set_title("Dirichlet Bootstrapped Portfolios Closely Approximate The \nOptimal Portfolio But With More Sesnsible Weights \n \n", loc='left', fontdict=font_title) | |
ax.legend(['Bootstrapped Dirichlet Porftolios', 'Bootsrapped Markowitz Portfolios','Dirichlet Portfolio', 'Markowitz Portfolio']) | |
ax = plt.subplot2grid((1,3),(0,2)) | |
az.plot_forest({col: opt_weights_df[col] for col in opt_weights_df.columns}, ax = ax, hdi_prob=0.95, colors=['b'],linewidth=0.3) | |
ax.spines["right"].set_visible(False) | |
ax.spines["top"].set_visible(False) | |
ax.spines["left"].set_visible(False) | |
ax.spines["bottom"].set_visible(False) | |
ax.set_facecolor((0,0,0,0.05)) | |
ax.set_title("Extremities & Highest Density Posterior Intervals\nFor Bootstrapped Markowitz Portfolios \n\n", loc='left', fontdict=font_title) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment