julian-west’s gists

julian-west / stock-correlations-imports.py

Last active September 7, 2019 16:46

	#data manipulation
	import numpy as np
	import pandas as pd

	#netowrk analysis
	import networkx as nx

	#plotting
	%matplotlib inline
	import matplotlib.pyplot as plt

julian-west / stock-correlations-load-data.py

Created September 7, 2019 16:48

	# read csv file
	raw_asset_prices_df = pd.read_csv("asset_prices.csv", index_col='Date')

	# get number of rows and columns of the dataset
	df_shape = (raw_asset_prices_df.shape)
	print(f"There are {df_shape[0]} rows and {df_shape[1]} columns in the dataset")
	print(f"Data timeperiod covers: {min(raw_asset_prices_df.index)} to {max(raw_asset_prices_df.index)}")

	# show first five rows
	raw_asset_prices_df.head()

julian-west / aliases.py

Created September 7, 2019 16:53

	aliases = pd.read_csv("etf_names.csv",usecols=['Code','ETF Alias'])

	#example aliases
	display(aliases)

	#convert to dictionary
	aliases = dict(zip(aliases['Code'],aliases['ETF Alias']))

	#rename columns from ETF codes to aliases
	raw_asset_prices_df = raw_asset_prices_df.rename(columns=aliases)

julian-west / stock-correlations-log_returns.py

Created September 7, 2019 16:54

	# create empty dataframe for log returns information
	log_returns_df = pd.DataFrame()

	# calculate log returns of each asset
	# loop through each column in dataframe and and calculate the daily log returns
	# add log returns column to new a dataframe
	for col in raw_asset_prices_df.columns:
	# dates are given in reverse order so need to set diff to -1.
	log_returns_df[col] = np.log(raw_asset_prices_df[col]).diff(-1)

julian-west / stock-correlations-calc_correlations.py

Created September 7, 2019 16:55

	#calculate correlation matrix using inbuilt pandas function
	correlation_matrix = log_returns_df.corr()

	#show first five rows of the correlation matrix
	correlation_matrix.head()

julian-west / stock-correlations-heatmap.py

Created September 7, 2019 16:56

	#visualise correlation matrix using a clustered heatmap
	display(HTML("<h3>Clustered Heatmap: Correlations between asset price returns</h3>"))
	sns.clustermap(correlation_matrix, cmap="RdYlGn")
	plt.show()

julian-west / stock-correlations-edge_list.py

Created September 7, 2019 16:58

	#convert matrix to list of edges and rename the columns
	edges = correlation_matrix.stack().reset_index()
	edges.columns = ['asset_1','asset_2','correlation']

	#remove self correlations
	edges = edges.loc[edges['asset_1'] != edges['asset_2']].copy()

	#show the first 5 rows of the edge list dataframe.
	edges.head()

julian-west / stock-correlations-edge_list2py

Created September 7, 2019 16:59

	#create undirected graph with weights corresponding to the correlation magnitude
	G0 = nx.from_pandas_edgelist(edges, 'asset_1', 'asset_2', edge_attr=['correlation'])

	#print out the graph info
	#check number of nodes and degrees are as expected (all should have degree = 38, i.e. average degree = 38)
	print(nx.info(G0))

julian-west / stock-correlations-visualise_network.py

Created September 7, 2019 17:00

	fig, ax = plt.subplots(nrows=2, ncols=2,figsize=(20,20))

	nx.draw(G0, with_labels=True, node_size=700, node_color="#e1575c",
	edge_color='#363847', pos=nx.circular_layout(G0),ax=ax[0,0])
	ax[0,0].set_title("Circular layout")

	nx.draw(G0, with_labels=True, node_size=700, node_color="#e1575c",
	edge_color='#363847', pos=nx.random_layout(G0),ax=ax[0,1])
	ax[0,1].set_title("Random layout")

julian-west / stock-correlations-threshold.py

Created September 7, 2019 17:01

	# 'winner takes all' method - set minium correlation threshold to remove some edges from the diagram
	threshold = 0.5

	# create a new graph from edge list
	Gx = nx.from_pandas_edgelist(edges, 'asset_1', 'asset_2', edge_attr=['correlation'])

	# list to store edges to remove
	remove = []
	# loop through edges in Gx and find correlations which are below the threshold
	for asset_1, asset_2 in Gx.edges():