Skip to content

Instantly share code, notes, and snippets.

@benjaminfspector
Last active January 6, 2017 13:14
Show Gist options
  • Save benjaminfspector/947d6b331f9eb22e2ab6215e329db306 to your computer and use it in GitHub Desktop.
Save benjaminfspector/947d6b331f9eb22e2ab6215e329db306 to your computer and use it in GitHub Desktop.
Gini Coefficient Code
"""
Takes a folder as its argument. Searches for all files (must be .hlt!) in that folder,
calculates their Gini coefficients, and then creates a json file ('ReplayGinis.json')
which contains the replay names and their associated Gini coefficients.
"""
import sys, json, numpy, os
out = open('ReplayGinis.json', 'w')
out.write('{')
for filename in os.listdir(sys.argv[1]):
prods = numpy.cumsum(sorted(numpy.array(json.loads(open(sys.argv[1]+'/'+filename, 'r').read())['productions']).flatten()))
val = (len(prods)*prods[-1]-2*numpy.trapz(prods)+prods[0])/len(prods)/prods[-1]
print('Finished analyzing ' + filename + ': Gini = ' + str(val))
out.write('\''+filename+'\':'+str(val)+',')
out.write('}')
out.flush()
"""
Takes the location of the json file produced by GetGinis.py as its argument.
Graphs the frequency of every Gini coefficient (rounded to nearest 0.01)
as a proportion of the whole.
"""
import sys, json
import matplotlib.pyplot as plt
j = json.loads(open(sys.argv[1], 'r').read())
vals = { i: 0 for i in range(12, 62) }
for name in j:
vals[round(100*float(j[name]))] += 1
plt.plot([x/100. for x in vals.keys()], [y/2095 for y in vals.values()])
plt.xlabel('Gini Coefficient')
plt.ylabel('Frequency')
plt.axis([0.1, 0.65, 0, 0.09])
plt.xticks([0.1+0.05*x for x in range(0, 12)])
plt.show()
"""
Takes the location of the json file produced by GetGinis.py as its argument.
Goes back through all the files with a Gini coefficient in a specific range,
and graphs the average Lorenz curve for all of them.
"""
import sys, json, numpy
import matplotlib.pyplot as plt
j = json.loads(open(sys.argv[1], 'r').read())
vals, counter = { i: 0 for i in range(100) }, 0
for name in j:
if 25 <= round(100*float(j[name])) <= 30:
counter += 1
prods = numpy.cumsum(sorted(numpy.array(json.loads(open('replays/'+name, 'r').read())['productions']).flatten()))
total, area, prev = numpy.sum(prods), 0, 0
for i in range(len(prods)):
area += (prods[i]+prev)/2
prev = prods[i]
if round(100*i/len(prods)) != round(100*(i+1)/len(prods)):
vals[round(100*i/len(prods))] += area/total
print('Finished analyzing ' + name)
for key in vals.keys():
vals[key] /= counter
plt.plot([x/100. for x in vals.keys()], [y for y in vals.values()])
plt.xlabel('Fraction of Sites')
plt.ylabel('Cumulative Fraction of Total Production')
plt.axis([0, 1, 0, 1])
plt.xticks([0.1*x for x in range(0, 11)])
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment