Skip to content

Instantly share code, notes, and snippets.

@bbengfort
Last active October 19, 2015 12:58
Show Gist options
  • Save bbengfort/226a108285cdb6cbf5fe to your computer and use it in GitHub Desktop.
Save bbengfort/226a108285cdb6cbf5fe to your computer and use it in GitHub Desktop.
Computing Statistics of NBA salaries.
# nba
# Analyzes the NBA Salary to PER data set
#
# Author: Benjamin Bengfort <[email protected]>
# Created: Sat Sep 20 09:35:11 2014 -0400
#
# Copyright (C) 2014 Bengfort.com
# For license information, see LICENSE.txt
#
# ID: nba.py [] [email protected] $
"""
Analyzes the NBA Salary to PER data set
"""
##########################################################################
## Imports
##########################################################################
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
PATH = os.path.abspath('fixtures/nba_players.csv')
def read_data(path=PATH):
return pd.DataFrame(pd.read_csv(PATH))
def graph_data(path=PATH, xkey='PER', ykey='SALARY'):
data = read_data(path)
xval = data['PER']
yval = data['SALARY']
fig,axe = plt.subplots()
plt.scatter(xval, yval, alpha=0.7)
plt.ylim([-10000, data['SALARY'].max()+500000])
plt.ylabel('salary')
plt.xlabel('player efficiency rating')
plt.title('NBA 2013 Player Efficieny Rating and Salary Correlation')
plt.grid(True)
plt.show()
if __name__ == '__main__':
graph_data()
# per
# Analyzes the NBA Salary to PER data set
#
# Author: Benjamin Bengfort <[email protected]>
# Created: Sat Sep 20 09:35:11 2014 -0400
#
# Copyright (C) 2014 Bengfort.com
# For license information, see LICENSE.txt
#
# ID: per.py [] [email protected] $
"""
Computes summary statistics for the nba_players.csv file
"""
##########################################################################
## Imports
##########################################################################
import csv
from collections import Counter
from operator import itemgetter
##########################################################################
## Analysis functions
##########################################################################
def load_data(path):
"""
Loads the data from a file into a list.
"""
with open(path, 'r') as data:
reader = csv.DictReader(data)
for row in reader:
row['SALARY'] = int(row['SALARY'])
row['PER'] = float(row['PER'])
yield row
def statistics(path):
data = list(load_data(path))
data = sorted(data, key=itemgetter('SALARY'))
count = 0
total = 0.0
freqs = Counter()
for row in data:
count += 1
total += row['SALARY']
freqs[row['SALARY']] += 1
stats = {
'maximum': data[-1]['SALARY'],
'minimum': data[0]['SALARY'],
'median': data[count / 2]['SALARY'],
'mode': freqs.most_common(2),
'mean': total / count,
}
return stats
if __name__ == '__main__':
import json
print json.dumps(statistics('fixtures/nba_players.csv'), indent=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment