Last active
October 19, 2015 12:58
-
-
Save bbengfort/226a108285cdb6cbf5fe to your computer and use it in GitHub Desktop.
Computing Statistics of NBA salaries.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# nba | |
# Analyzes the NBA Salary to PER data set | |
# | |
# Author: Benjamin Bengfort <[email protected]> | |
# Created: Sat Sep 20 09:35:11 2014 -0400 | |
# | |
# Copyright (C) 2014 Bengfort.com | |
# For license information, see LICENSE.txt | |
# | |
# ID: nba.py [] [email protected] $ | |
""" | |
Analyzes the NBA Salary to PER data set | |
""" | |
########################################################################## | |
## Imports | |
########################################################################## | |
import os | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
PATH = os.path.abspath('fixtures/nba_players.csv') | |
def read_data(path=PATH): | |
return pd.DataFrame(pd.read_csv(PATH)) | |
def graph_data(path=PATH, xkey='PER', ykey='SALARY'): | |
data = read_data(path) | |
xval = data['PER'] | |
yval = data['SALARY'] | |
fig,axe = plt.subplots() | |
plt.scatter(xval, yval, alpha=0.7) | |
plt.ylim([-10000, data['SALARY'].max()+500000]) | |
plt.ylabel('salary') | |
plt.xlabel('player efficiency rating') | |
plt.title('NBA 2013 Player Efficieny Rating and Salary Correlation') | |
plt.grid(True) | |
plt.show() | |
if __name__ == '__main__': | |
graph_data() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# per | |
# Analyzes the NBA Salary to PER data set | |
# | |
# Author: Benjamin Bengfort <[email protected]> | |
# Created: Sat Sep 20 09:35:11 2014 -0400 | |
# | |
# Copyright (C) 2014 Bengfort.com | |
# For license information, see LICENSE.txt | |
# | |
# ID: per.py [] [email protected] $ | |
""" | |
Computes summary statistics for the nba_players.csv file | |
""" | |
########################################################################## | |
## Imports | |
########################################################################## | |
import csv | |
from collections import Counter | |
from operator import itemgetter | |
########################################################################## | |
## Analysis functions | |
########################################################################## | |
def load_data(path): | |
""" | |
Loads the data from a file into a list. | |
""" | |
with open(path, 'r') as data: | |
reader = csv.DictReader(data) | |
for row in reader: | |
row['SALARY'] = int(row['SALARY']) | |
row['PER'] = float(row['PER']) | |
yield row | |
def statistics(path): | |
data = list(load_data(path)) | |
data = sorted(data, key=itemgetter('SALARY')) | |
count = 0 | |
total = 0.0 | |
freqs = Counter() | |
for row in data: | |
count += 1 | |
total += row['SALARY'] | |
freqs[row['SALARY']] += 1 | |
stats = { | |
'maximum': data[-1]['SALARY'], | |
'minimum': data[0]['SALARY'], | |
'median': data[count / 2]['SALARY'], | |
'mode': freqs.most_common(2), | |
'mean': total / count, | |
} | |
return stats | |
if __name__ == '__main__': | |
import json | |
print json.dumps(statistics('fixtures/nba_players.csv'), indent=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment