Created
December 19, 2013 17:30
-
-
Save bchartoff/8043070 to your computer and use it in GitHub Desktop.
SDQL and plotly API scripts to generate sports score heatmaps.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib, urlparse | |
import re | |
import random | |
import numpy | |
API_KEY = "SDQL_KEY" | |
# I want to jiggle Y and Yo? | |
def jiggler(lst,jiggle): | |
return [x + jiggle * ( random.random() - 0.6 ) for x in lst] | |
class SDB: | |
def __init__(self,sport,table=''): | |
# we need these to query the server at the right url | |
self.sport = sport.lower() | |
self.table = table or '' # self.query wants a string | |
# query the server, convert json to a python dictionary that likes this for the sdql 'points@_i<10': | |
# d = {'headers': ['points'], 'groups': | |
# [{'sdql': '_i < 10', 'columns': [[105, 91, 100, 101, 112, 106, 106, 111, 112, 94]]}]} | |
def query(self,sdql,**kwargs): | |
# take care of some server parameters | |
kwargs["sdql"] = sdql | |
kwargs["output"] = "json" | |
kwargs.setdefault("reduce", "1") | |
kwargs["api_key"] = API_KEY | |
# build up the url | |
page = self.sport + "/%s%squery.json"%(self.table,self.table and '_') | |
qs = '&'.join(["%s=%s"%(urllib.quote_plus(str(k)),urllib.quote_plus(str(v))) for (k,v) in kwargs.items()]) | |
url = urlparse.urlunparse([ 'http','sportsdatabase.com',page, '',qs, '']) | |
#print "url:",url | |
result = urllib.urlopen(url).read() # this is a string representing a json callback | |
# quick + dirty conversion from json to python | |
result = result.replace('json_callback','d=').replace('null','None') | |
exec(result) # create a python dictionary called `d`. | |
return d | |
# look at the query result object at set common layout elements | |
def set_layout(self,res,**kwargs): | |
layout = kwargs.get('layout',{}) | |
xaxis = layout.setdefault("xaxis", {}) | |
xaxis.setdefault('title',res["headers"][0]) | |
yaxis = layout.setdefault("yaxis", {}) | |
if len(res["headers"]) == 1: ytitle = "Count" # a good guess? | |
else: ytitle = res["headers"][1] | |
yaxis.setdefault('title',ytitle) | |
return layout | |
def histogram(self,sdql,**kwargs): | |
res = self.query(sdql,**kwargs) | |
layout = self.set_layout(res,**kwargs) | |
kwargs['x'] = res["columns"][0] | |
kwargs['type'] = 'histogramx' | |
kwargs['xbins'] = {'size':1} | |
#print data | |
return plot( [ kwargs ],layout=layout ) | |
def scatter(self,sdql,**kwargs): | |
res = self.query(sdql,**kwargs) | |
layout = self.set_layout(res,**kwargs) | |
kwargs['x'] = res["columns"][0] | |
kwargs['y'] = res["columns"][1] | |
kwargs['mode'] = 'markers' | |
return plot( [ kwargs ],layout=layout ) | |
def heat(self,sdql,**kwargs): | |
res = self.query(sdql,**kwargs) | |
columns = res["columns"] | |
layout = self.set_layout(res,**kwargs) | |
layout['title'] = res["headers"][2] | |
columns[0] = map(int,columns[0]) | |
columns[1] = map(int,columns[1]) | |
xmax = kwargs.get('xmax',max(columns[0])) | |
xmin = kwargs.get('xmin',min(columns[0])) | |
ymax = kwargs.get('ymax',max(columns[1])) | |
ymin = kwargs.get('ymin',min(columns[1])) | |
layout.setdefault('xaxis',{}) | |
layout['xaxis'] = {'showgrid':False,'zeroline':False} | |
layout.setdefault('yaxis',{}) | |
layout['yaxis'] = {'showgrid':False,'zeroline':False} | |
kwargs['x'] = map(lambda x:x-0.5,range(xmin,xmax+2)) | |
kwargs['y'] = map(lambda x:x-0.5,range(ymin,ymax+2)) | |
d = {} | |
z = [] | |
for i in range(len(columns[0])): | |
d[(columns[0][i],columns[1][i])] = columns[2][i] | |
for i in range(min(columns[0]),max(columns[0])+1): | |
z.append([]) | |
for j in range(min(columns[1]),max(columns[1])+1): | |
z[-1].append(d.get((i,j))) | |
#print z | |
kwargs['z'] = z | |
kwargs['type'] = 'heatmap' | |
return plot( [ kwargs ],layout=layout ) | |
def box(self,sdql,**kwargs): | |
kwargs["reduce"] = '0' | |
res = self.query(sdql,**kwargs) | |
#print res | |
data = [] | |
groups = res['groups'] | |
groups.sort(key=lambda x:sum(x['columns'][0])/len(x['columns'][0])) | |
#groups.sort(key=lambda x:sum(x['columns'][0])) | |
#groups.sort(key=lambda x:int(re.findall("([0-9]+)",x['sdql'])[0])) | |
groups.reverse() | |
colors = {'Lin':'rgba(201, 0, 22, 0.5)'} | |
for group in groups: | |
pdata = group['columns'][0] | |
#if len(pdata) < 20: | |
# print "warning box culling result: not enough values. " #better to do this with sub query | |
# continue | |
if kwargs.get("jittery"): pdata = jiggler(pdata,kwargs["jittery"]) | |
if kwargs.get("yoff"): pdata = map(lambda y,yo=kwargs["yoff"]:y+yo,pdata) | |
name = kwargs.get('xname',re.findall("[a-z]+[\s]*=[\s]*(.*)",group['sdql'].split(" and ")[0],re.I)[0]) | |
name = name.split()[-1].strip() | |
marker_color = colors.get(name,kwargs.get('marker_color','rgba(100, 100, 100, .5)')) | |
marker_size = 4 | |
#if name == "Lin": | |
# name="Jeremy Lin" | |
# marker_size += 1 | |
#else: name = '' | |
data.append({'y': pdata, 'type':'box','jitter':kwargs.get('jitter',0.85), | |
'pointpos': kwargs.get('pointpos',0), # relative position of the 'jittered' points w.r.t. the box | |
'fillcolor':kwargs.get('box_fill_color','rgba(255, 255, 255, 0)'), # transparent | |
'line':{'color':kwargs.get('line_color','rgb(201, 0, 22)'),'width':kwargs.get('line_width',1)}, | |
'marker':{'color':marker_color,'size':marker_size, | |
'line':{'color':marker_color, 'width':1} | |
}, | |
'boxpoints':kwargs.get('boxpoints','all'), | |
'name':name}) | |
#print data[:5] 'xaxis':{'showgrid':False,'zeroline':False, | |
#'linecolor':'rgba(255, 255, 255, 0)'}, # Make border line around the plot transparent | |
font_color = 'rgb(0, 0, 0)' | |
ymax = kwargs.get('ymax',int(max(map(lambda d:max(d['y']),data)) + 1)) | |
ymin = kwargs.get('ymin',int(min(map(lambda d:min(d['y']),data)) - 0.5)) | |
#print "ymax",ymax | |
if kwargs.get('start') and kwargs.get('stop'): font_color = 'rgb(201, 0, 22)' | |
layout = {'title':kwargs.get('title',res['groups'][0]['sdql']), | |
'xaxis': {'showgrid':False,'zeroline':False, 'tickangle':90,'showticklabels':True, | |
'linecolor':'rgba(255, 255, 255, 0)', | |
'tickcolor':'rgba(255, 255, 255, 0)'}, | |
'yaxis': {'showgrid':False,'zeroline':False, 'gridcolor':'white','linecolor':'rgba(255, 255, 255, 0)', | |
'tickcolor':'rgba(255, 255, 255, 0)', | |
"autorange": kwargs.get("autorange",True) or False, "range":[ymin,ymax], | |
"type":"linear", | |
'title':res['headers'][0]}, | |
'paper_bgcolor': 'rgb(255,255,255)', | |
'plot_bgcolor': 'rgb(255,255,255)', | |
'font': { 'size':10, | |
'color': font_color}, | |
'showlegend':False} | |
if kwargs.get("stop"): | |
data=data[:kwargs['stop']] | |
if kwargs.get("start"): | |
data=data[kwargs['start']:] | |
if kwargs.get("plot",1): | |
return plot(data,layout=layout) | |
else: | |
return data,layout | |
nba = SDB("nba") | |
ncaafb = SDB("ncaafb") | |
ncaabb = SDB("ncaabb") | |
nfl = SDB("nfl") | |
cfl = SDB("cfl") | |
nhl = SDB("nhl") | |
mlb = SDB("mlb") | |
passing = SDB("nfl",table="passing") | |
nbap = SDB("nba",table='player') | |
def scoring_heat(): | |
nhl.heat("R(goals) as '',R(o:goals) as '',S(1) as ''@goals and o:goals and goals is not None and Conference not in team and Team not in team") | |
mlb.heat("R(runs) as '',R(o:runs) as '',S(1) as ''@runs and o:runs and runs is not None") | |
nfl.heat("R(points) as '',R(o:points) as '',S(1) as ''@points and o:points and points is not None") | |
nba.heat("R(points) as '',R(o:points) as '',S(1) as ''@points and o:points and points is not None") | |
def lin(): | |
title = "Jeremy Lin NBA Stats for 2012<BR><BR>regular season and <font color=C90016>playoffs</font>" | |
po_color = "rgba(244,0,0,0.5)" | |
stack = [] | |
stats = ["minutes","field goals made as FGs","field goals attempted as 'FGAs'","three pointers made as Threes","three pointers attempted as '3As'","free throws made as FTs","free throws attempted as 'FTAs'","rebounds","assists","blocks","steals","fouls","turnovers","points"] | |
for stat in stats: | |
print "stat:",stat | |
names = stat.split(' as ') | |
if len(names) == 2: name = names[-1].replace("'",'') | |
else: name = stat.title() | |
data,layout = nbap.box("%s as ''@name=Jeremy Lin and season=2012 and minutes>0 and playoffs=0"%stat, | |
title=title,plot=0,pointpos=1.75,jitter=0.3,ymin=-0.5,ymax=50,xname=name,jittery=0, | |
marker_color="rgba(170,170,170,0.3)", | |
line_color="rgba(100,100,100,0.5)",box_fill_color="rgba(200,200,200,0.5)",line_width=1, | |
marker_size=3) | |
stack += data[:] | |
data,layout = nbap.box("%s as ''@name=Jeremy Lin and season=2012 and minutes>0 and playoffs=1"%stat, | |
title=title,plot=0,pointpos=1.75,jitter=0.3,ymin=-0.5,ymax=50,xname=name,jittery=0, | |
marker_color="rgba(201,0,22,0.3)", | |
line_color="rgba(201,0,22,0.3)",box_fill_color="rgba(201,0,22,0.05)",line_width=1, | |
marker_size=3) | |
stack += data[:] | |
plot(stack,layout=layout) | |
def lin_in_top(): | |
stop = 100 | |
title = "Jeremy Lin Among Top %d NBA Players in 2012" % stop | |
nbap.box("points as 'Points'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
nbap.box("assists as 'Assists'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
nbap.box("steals as 'Steals'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
nbap.box("5*three pointers made+2*assists+5*blocks+0.5*points+5*steals+rebounds as 'Fantasy Points'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
return | |
nbap.box("minutes as 'Minutes'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
nbap.box("field goals made as 'Field Goals'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
nbap.box("field goals attempted as 'Field Goals Attempted'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
nbap.box("three pointers made as '3s'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
nbap.box("three pointers attempted as '3s Attempted'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
nbap.box("free throws made as 'Free Throws'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
nbap.box("free throws attempted as 'Free Throw Attempted'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
nbap.box("fouls as 'Fouls'@name and season=2012 and minutes>0",title=title,ymax=6.5,ymin=-0.5,stop=stop) | |
nbap.box("turnovers as 'Turnovers'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
nbap.box("rebounds as 'Rebounds'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
nbap.box("offensive rebounds as 'Offensive Rebounds'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
nbap.box("blocks as 'Blocks'@name and season=2012 and minutes>0",title=title,ymin=-0.5,stop=stop) | |
def test_box(): | |
box1 = {'y': [0, 1, 2, 4], | |
'type': 'box'} | |
box2 = {'y': [1,2,4,5,8], | |
'type': 'box'} | |
response = plot([box1, box2]) | |
def test_scatter(): | |
x0 = [1,2,3,4]; y0 = [10,15,13,17] | |
x1 = [2,3,4,5]; y1 = [16,5,11,9] | |
response = plot(x0, y0, x1, y1) | |
if __name__ == "__main__": | |
import plotly | |
plot = plotly.plotly(username='username', key='PLOTLY_KEY').plot | |
#nfl.histogram("S(third down conversions)@week and team and season") | |
#lin() | |
#test_box() | |
#test_scatter() | |
scoring_heat() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment