Created
September 26, 2021 23:25
-
-
Save Vido/6c6d43749174ebfcb54b5986fc78ed00 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import yfinance | |
import dryscrape | |
from bs4 import BeautifulSoup | |
import numpy as np | |
import pandas as pd | |
from sklearn.decomposition import PCA | |
import matplotlib.pyplot as plt | |
def get_market_data(tickers, start, end): | |
market_data = yfinance.download( | |
tickers = tickers, | |
start=start, | |
end=end, | |
interval = '1d', | |
treads = False | |
) | |
# Fills missing values. Some stocks are recent IPOs | |
return (market_data['Close'] | |
.fillna(method='backfill') | |
.fillna(method='ffill')) | |
def plot_pca_vs_ibov(log_ts, idx): | |
rs_df = pd.concat([log_ts, idx], 1) | |
rs_df.columns = ["PCA Portfolio", "IBOV"] | |
crs_df = rs_df.cumsum().apply(np.exp) | |
crs_df.plot() | |
plt.show() | |
def plot_pca_vs_ibov_l10(idx, pcal10, ibovl10): | |
rs_df = pd.concat([idx, pcal10, ibovl10], 1) | |
rs_df.columns = ["IBOV", "PCA-L10", "IBOV-L10"] | |
crs_df = rs_df.cumsum().apply(np.exp) | |
crs_df.plot() | |
plt.show() | |
# Loads Ibovespa tickers from file | |
composition = pd.read_pickle('composition.df') | |
w_ibov = composition['Part. (%)'] | |
# Get market data | |
tickers = composition.index.tolist() | |
data_ts = get_market_data(tickers, '2020-09-23', '2021-09-23') | |
ibov_ts = get_market_data(['^BVSP'], '2020-09-23', '2021-09-23') | |
# Get normalized log-return | |
log_data = data_ts.apply(np.log).diff(1)[1:] | |
log_ibov = ibov_ts.apply(np.log).diff(1)[1:] | |
# Plot market returns | |
acc = log_data.cumsum().apply(np.exp) | |
acc.plot() | |
plt.show() | |
# PCA 1 | |
pca = PCA(1).fit(log_data) | |
pc1 = pd.Series(index=log_data.columns, data=pca.components_[0]) | |
weights = abs(pc1)/sum(abs(pc1)) | |
# Compare Ibov vs PCA Portfolio | |
w_ibov = composition['Part. (%)'] | |
print(weights.nlargest(10)) | |
print(weights.nsmallest(10)) | |
print(w_ibov.nlargest(10)) | |
print(w_ibov.nsmallest(10)) | |
# | |
pca_ts = (weights*log_data).sum(1) | |
pcal10 = log_data[pc1.nlargest(10).index].mean(1) | |
il10 = w_ibov.nlargest(10) / sum(w_ibov.nlargest(10)) | |
ibovl10 = (log_data[il10.index] * il10).sum(1) | |
idx = pd.Series(index=ibov_ts.index, data=log_ibov) | |
# Plot Ibov vs PCA Portfolio | |
plot_pca_vs_ibov(pca_ts, idx) | |
plot_pca_vs_ibov_l10(idx, pcal10, ibovl10) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment