Last active
January 6, 2017 13:14
-
-
Save benjaminfspector/947d6b331f9eb22e2ab6215e329db306 to your computer and use it in GitHub Desktop.
Gini Coefficient Code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Takes a folder as its argument. Searches for all files (must be .hlt!) in that folder, | |
calculates their Gini coefficients, and then creates a json file ('ReplayGinis.json') | |
which contains the replay names and their associated Gini coefficients. | |
""" | |
import sys, json, numpy, os | |
out = open('ReplayGinis.json', 'w') | |
out.write('{') | |
for filename in os.listdir(sys.argv[1]): | |
prods = numpy.cumsum(sorted(numpy.array(json.loads(open(sys.argv[1]+'/'+filename, 'r').read())['productions']).flatten())) | |
val = (len(prods)*prods[-1]-2*numpy.trapz(prods)+prods[0])/len(prods)/prods[-1] | |
print('Finished analyzing ' + filename + ': Gini = ' + str(val)) | |
out.write('\''+filename+'\':'+str(val)+',') | |
out.write('}') | |
out.flush() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Takes the location of the json file produced by GetGinis.py as its argument. | |
Graphs the frequency of every Gini coefficient (rounded to nearest 0.01) | |
as a proportion of the whole. | |
""" | |
import sys, json | |
import matplotlib.pyplot as plt | |
j = json.loads(open(sys.argv[1], 'r').read()) | |
vals = { i: 0 for i in range(12, 62) } | |
for name in j: | |
vals[round(100*float(j[name]))] += 1 | |
plt.plot([x/100. for x in vals.keys()], [y/2095 for y in vals.values()]) | |
plt.xlabel('Gini Coefficient') | |
plt.ylabel('Frequency') | |
plt.axis([0.1, 0.65, 0, 0.09]) | |
plt.xticks([0.1+0.05*x for x in range(0, 12)]) | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Takes the location of the json file produced by GetGinis.py as its argument. | |
Goes back through all the files with a Gini coefficient in a specific range, | |
and graphs the average Lorenz curve for all of them. | |
""" | |
import sys, json, numpy | |
import matplotlib.pyplot as plt | |
j = json.loads(open(sys.argv[1], 'r').read()) | |
vals, counter = { i: 0 for i in range(100) }, 0 | |
for name in j: | |
if 25 <= round(100*float(j[name])) <= 30: | |
counter += 1 | |
prods = numpy.cumsum(sorted(numpy.array(json.loads(open('replays/'+name, 'r').read())['productions']).flatten())) | |
total, area, prev = numpy.sum(prods), 0, 0 | |
for i in range(len(prods)): | |
area += (prods[i]+prev)/2 | |
prev = prods[i] | |
if round(100*i/len(prods)) != round(100*(i+1)/len(prods)): | |
vals[round(100*i/len(prods))] += area/total | |
print('Finished analyzing ' + name) | |
for key in vals.keys(): | |
vals[key] /= counter | |
plt.plot([x/100. for x in vals.keys()], [y for y in vals.values()]) | |
plt.xlabel('Fraction of Sites') | |
plt.ylabel('Cumulative Fraction of Total Production') | |
plt.axis([0, 1, 0, 1]) | |
plt.xticks([0.1*x for x in range(0, 11)]) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment