This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#data manipulation | |
import numpy as np | |
import pandas as pd | |
#netowrk analysis | |
import networkx as nx | |
#plotting | |
%matplotlib inline | |
import matplotlib.pyplot as plt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# read csv file | |
raw_asset_prices_df = pd.read_csv("asset_prices.csv", index_col='Date') | |
# get number of rows and columns of the dataset | |
df_shape = (raw_asset_prices_df.shape) | |
print(f"There are {df_shape[0]} rows and {df_shape[1]} columns in the dataset") | |
print(f"Data timeperiod covers: {min(raw_asset_prices_df.index)} to {max(raw_asset_prices_df.index)}") | |
# show first five rows | |
raw_asset_prices_df.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
aliases = pd.read_csv("etf_names.csv",usecols=['Code','ETF Alias']) | |
#example aliases | |
display(aliases) | |
#convert to dictionary | |
aliases = dict(zip(aliases['Code'],aliases['ETF Alias'])) | |
#rename columns from ETF codes to aliases | |
raw_asset_prices_df = raw_asset_prices_df.rename(columns=aliases) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create empty dataframe for log returns information | |
log_returns_df = pd.DataFrame() | |
# calculate log returns of each asset | |
# loop through each column in dataframe and and calculate the daily log returns | |
# add log returns column to new a dataframe | |
for col in raw_asset_prices_df.columns: | |
# dates are given in reverse order so need to set diff to -1. | |
log_returns_df[col] = np.log(raw_asset_prices_df[col]).diff(-1) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#calculate correlation matrix using inbuilt pandas function | |
correlation_matrix = log_returns_df.corr() | |
#show first five rows of the correlation matrix | |
correlation_matrix.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#visualise correlation matrix using a clustered heatmap | |
display(HTML("<h3>Clustered Heatmap: Correlations between asset price returns</h3>")) | |
sns.clustermap(correlation_matrix, cmap="RdYlGn") | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#convert matrix to list of edges and rename the columns | |
edges = correlation_matrix.stack().reset_index() | |
edges.columns = ['asset_1','asset_2','correlation'] | |
#remove self correlations | |
edges = edges.loc[edges['asset_1'] != edges['asset_2']].copy() | |
#show the first 5 rows of the edge list dataframe. | |
edges.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#create undirected graph with weights corresponding to the correlation magnitude | |
G0 = nx.from_pandas_edgelist(edges, 'asset_1', 'asset_2', edge_attr=['correlation']) | |
#print out the graph info | |
#check number of nodes and degrees are as expected (all should have degree = 38, i.e. average degree = 38) | |
print(nx.info(G0)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig, ax = plt.subplots(nrows=2, ncols=2,figsize=(20,20)) | |
nx.draw(G0, with_labels=True, node_size=700, node_color="#e1575c", | |
edge_color='#363847', pos=nx.circular_layout(G0),ax=ax[0,0]) | |
ax[0,0].set_title("Circular layout") | |
nx.draw(G0, with_labels=True, node_size=700, node_color="#e1575c", | |
edge_color='#363847', pos=nx.random_layout(G0),ax=ax[0,1]) | |
ax[0,1].set_title("Random layout") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 'winner takes all' method - set minium correlation threshold to remove some edges from the diagram | |
threshold = 0.5 | |
# create a new graph from edge list | |
Gx = nx.from_pandas_edgelist(edges, 'asset_1', 'asset_2', edge_attr=['correlation']) | |
# list to store edges to remove | |
remove = [] | |
# loop through edges in Gx and find correlations which are below the threshold | |
for asset_1, asset_2 in Gx.edges(): |
OlderNewer