bbengfort · October 19, 2015 12:58
diff --git a/nba.py b/nba.py
 # nba
 # Analyzes the NBA Salary to PER data set
 #
 # Author:   Benjamin Bengfort <[email protected]>
 # Created:  Sat Sep 20 09:35:11 2014 -0400
 #
 # Copyright (C) 2014 Bengfort.com
 # For license information, see LICENSE.txt
 #
 # ID: nba.py [] [email protected] $

 """
 Analyzes the NBA Salary to PER data set
 """

 ##########################################################################
 ## Imports
 ##########################################################################

 import os
 import pandas as pd
 import numpy as np

 import matplotlib.pyplot as plt

 PATH = os.path.abspath('fixtures/nba_players.csv')

 def read_data(path=PATH):
    return pd.DataFrame(pd.read_csv(PATH))

 def graph_data(path=PATH, xkey='PER', ykey='SALARY'):
    data = read_data(path)
    xval = data['PER']
    yval = data['SALARY']

    fig,axe = plt.subplots()
    plt.scatter(xval, yval, alpha=0.7)
    plt.ylim([-10000, data['SALARY'].max()+500000])

    plt.ylabel('salary')
    plt.xlabel('player efficiency rating')
    plt.title('NBA 2013 Player Efficieny Rating and Salary Correlation')

    plt.grid(True)
    plt.show()

 if __name__ == '__main__':
    graph_data()
diff --git a/per.py b/per.py
 # per
 # Analyzes the NBA Salary to PER data set
 #
 # Author:   Benjamin Bengfort <[email protected]>
 # Created:  Sat Sep 20 09:35:11 2014 -0400
 #
 # Copyright (C) 2014 Bengfort.com
 # For license information, see LICENSE.txt
 #
 # ID: per.py [] [email protected] $

 """
 Computes summary statistics for the nba_players.csv file
 """

 ##########################################################################
 ## Imports
 ##########################################################################

 import csv

 from collections import Counter
 from operator import itemgetter

 ##########################################################################
 ## Analysis functions
 ##########################################################################

 def load_data(path):
    """
    Loads the data from a file into a list.
    """
    with open(path, 'r') as data:
        reader = csv.DictReader(data)
        for row in reader:
            row['SALARY'] = int(row['SALARY'])
            row['PER'] = float(row['PER'])
            yield row

 def statistics(path):
    data  = list(load_data(path))
    data  = sorted(data, key=itemgetter('SALARY'))

    count = 0
    total = 0.0
    freqs = Counter()

    for row in data:
        count += 1
        total += row['SALARY']
        freqs[row['SALARY']] += 1

    stats = {
        'maximum': data[-1]['SALARY'],
        'minimum': data[0]['SALARY'],
        'median': data[count / 2]['SALARY'],
        'mode': freqs.most_common(2),
        'mean': total / count,
    }

    return stats

 if __name__ == '__main__':
    import json
    print json.dumps(statistics('fixtures/nba_players.csv'), indent=4)
	# nba
	# Analyzes the NBA Salary to PER data set
	#
	# Author: Benjamin Bengfort <[email protected]>
	# Created: Sat Sep 20 09:35:11 2014 -0400
	#
	# Copyright (C) 2014 Bengfort.com
	# For license information, see LICENSE.txt
	#
	# ID: nba.py [] [email protected] $

	"""
	Analyzes the NBA Salary to PER data set
	"""

	##########################################################################
	## Imports
	##########################################################################

	import os
	import pandas as pd
	import numpy as np

	import matplotlib.pyplot as plt

	PATH = os.path.abspath('fixtures/nba_players.csv')

	def read_data(path=PATH):
	return pd.DataFrame(pd.read_csv(PATH))

	def graph_data(path=PATH, xkey='PER', ykey='SALARY'):
	data = read_data(path)
	xval = data['PER']
	yval = data['SALARY']

	fig,axe = plt.subplots()
	plt.scatter(xval, yval, alpha=0.7)
	plt.ylim([-10000, data['SALARY'].max()+500000])

	plt.ylabel('salary')
	plt.xlabel('player efficiency rating')
	plt.title('NBA 2013 Player Efficieny Rating and Salary Correlation')

	plt.grid(True)
	plt.show()

	if __name__ == '__main__':
	graph_data()
	# per
	# Analyzes the NBA Salary to PER data set
	#
	# Author: Benjamin Bengfort <[email protected]>
	# Created: Sat Sep 20 09:35:11 2014 -0400
	#
	# Copyright (C) 2014 Bengfort.com
	# For license information, see LICENSE.txt
	#
	# ID: per.py [] [email protected] $

	"""
	Computes summary statistics for the nba_players.csv file
	"""

	##########################################################################
	## Imports
	##########################################################################

	import csv

	from collections import Counter
	from operator import itemgetter

	##########################################################################
	## Analysis functions
	##########################################################################

	def load_data(path):
	"""
	Loads the data from a file into a list.
	"""
	with open(path, 'r') as data:
	reader = csv.DictReader(data)
	for row in reader:
	row['SALARY'] = int(row['SALARY'])
	row['PER'] = float(row['PER'])
	yield row

	def statistics(path):
	data = list(load_data(path))
	data = sorted(data, key=itemgetter('SALARY'))

	count = 0
	total = 0.0
	freqs = Counter()

	for row in data:
	count += 1
	total += row['SALARY']
	freqs[row['SALARY']] += 1

	stats = {
	'maximum': data[-1]['SALARY'],
	'minimum': data[0]['SALARY'],
	'median': data[count / 2]['SALARY'],
	'mode': freqs.most_common(2),
	'mean': total / count,
	}

	return stats

	if __name__ == '__main__':
	import json
	print json.dumps(statistics('fixtures/nba_players.csv'), indent=4)