Created
December 14, 2022 13:07
-
-
Save tulerpetontidae/98e942ec3bc84685eb4e197fb4658775 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import numpy as np | |
import pandas as pd | |
groups = ['citizenship', 'race', 'sex', 'institute', 'religion', 'occupation', 'first names', 'eye color'] | |
# List of grouped people, e.g. [1 - iranian, 1 - italian, 4 - russians] | |
number_of_people = [[1, 1, 4], [1, 5], [1, 5], [1, 1, 4], [6], [1, 1, 4], [2, 1, 1, 1, 1], [2,2,2]] | |
# Total number of people | |
n_people = 6 | |
# You need to normalise to the best case scenario, e.g if there're only 2 possible variants for a group of 5 people | |
# [2.5, 2.5] will give you the maximum possible diversity index. | |
best_case = [[1]*n_people, | |
[n_people / 5] * n_people, | |
[n_people / 2] * n_people, | |
[1] * n_people, | |
[n_people / 5] * n_people, | |
[1] * n_people, | |
[1] * n_people, | |
[n_people/4] * n_people] | |
# The metric is based on the Shannon diversity index, normalised by the best case scenario | |
shannon_index_norm = [] | |
for s, sm in zip(number_of_people, best_case): | |
shannon = -np.sum([x / n_people * np.log(x / n_people) for x in s]) | |
shannon_best = -np.sum([x / n_people * np.log(x / n_people) for x in sm]) | |
shannon_index_norm.append(shannon / shannon_best) | |
print(shannon_best) | |
# Set data | |
df = pd.DataFrame({k: [v] for k, v in zip(groups, shannon_index_norm)}) | |
# number of variable | |
categories = list(df) | |
N = len(categories) | |
# We are going to plot the first line of the data frame. | |
# But we need to repeat the first value to close the circular graph: | |
values = df.loc[0].values.flatten().tolist() | |
values += values[:1] | |
# What will be the angle of each axis in the plot? (we divide the plot / number of variable) | |
angles = [n / float(N) * 2 * np.pi for n in range(N)] | |
angles += angles[:1] | |
plt.figure(figsize=(8, 8)) | |
ax = plt.subplot(111, polar=True) | |
# Draw one axe per variable + add labels | |
plt.xticks(angles[:-1], categories, color='grey', size=15) | |
# Draw ylabels | |
ax.set_rlabel_position(0) | |
plt.yticks(np.linspace(0, 1, 5), np.linspace(0, 1, 5), color="grey", size=11) | |
plt.ylim(0, 1.) | |
# Plot data | |
ax.plot(angles, values, linewidth=1, linestyle='solid') | |
# Fill area | |
ax.fill(angles, values, 'b', alpha=0.1) | |
# Show the graph | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment