This entry is intended to demonsrate how easy it is to use:
-
Stattleship’s API: http://playbook.stattleship.com/
-
the R package https://github.com/stattleship/stattleship-r
-
along with http://www.neo4j.org/ as a database,
to performance, rank teams, strength of schedule very quickly.
R is installed, the R package StattleshipR
packge is installed, and optionally, a Neo4j server running locally if you would rather develop on your machine. In that case, you will also need to have the RNeo4j
package installed as well. Both packages should be installed from github
using the devtools
library, and the function install_github
.
Lastly, I set Neo4j credentials to neo4j as user=neo4j
and password=password
. And after signing up for a Stattleship Token, I added it to my local R Environment variable using a .Renviron
file in my local drive ~
with a value of STATTLE_TOKEN=MYTOKENHERE
.
// constraints
CREATE CONSTRAINT ON (n:Team) ASSERT n.id IS UNIQUE;
CREATE CONSTRAINT ON (n:Game) ASSERT n.id IS UNIQUE;
// import the teams
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "https://docs.google.com/spreadsheets/d/1N1ojcEY9syOvUI2EQ8ked54NGJB8Xmo9V-Rb5j0b7ZM/pub?gid=1724989853&single=true&output=csv" AS row
MERGE (t:Team {id:row.id, nickname:row.nickname, slug:row.slug});
// import the games
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "https://docs.google.com/spreadsheets/d/1ZYBARl3UL6DL5OIQX43hcize7xyMf2HuzmskU10Hy2U/pub?gid=211648136&single=true&output=csv" AS row
WITH row
MERGE (g:Game {id:row.id,
duration:toInt(row.duration),
attendance:toInt(row.attendance),
score_diff:toInt(row.score_differential),
scoreline:row.scoreline} );
// connect the teams and games -- assumes everything is created already, otherwise use merge
USING PERIODIC COMMIT 10000
LOAD CSV WITH HEADERS FROM "https://docs.google.com/spreadsheets/d/1ZYBARl3UL6DL5OIQX43hcize7xyMf2HuzmskU10Hy2U/pub?gid=211648136&single=true&output=csv" AS row
WITH row
MATCH (g:Game {id:row.id})
MATCH (a:Team {id:row.away_team_id})
MATCH (h:Team {id:row.home_team_id})
WITH row, g, a, h
MERGE (a) -[:AWAY_TEAM {goals:toInt(row.away_team_score),
outcome:row.away_team_outcome,
points: CASE WHEN row.away_team_outcome='win' THEN 2
WHEN row.away_team_outcome='overtime_loss' THEN 1
ELSE 0
END}]-> (g)
MERGE (h) -[:HOME_TEAM {goals:toInt(row.home_team_score),
outcome:row.home_team_outcome,
points: CASE WHEN row.home_team_outcome='win' THEN 2
WHEN row.home_team_outcome='overtime_loss' THEN 1
ELSE 0
END}]-> (g);
The R code below connects to the API, grabs the data, and what we need into the database. One note: I am using cypher to import the data by to load csv files with the LOAD CSV
command. In the R code, I commented out a section of the code that I used locally to import the data into a local Neo4j database.
options(stringsAsFactors = FALSE) ## packages library(stattleshipR) library(dplyr) library(lubridate) library(stringr) ## set the token set_token(Sys.getenv("STATTLE_TOKEN")) ## helper function parse_stattle <- function(stattle_list, entry) { x <- do.call("rbind", lapply(stattle_list, function(x) x[[entry]])) stopifnot(is.data.frame(x)) return(x) } ## get the API request for games games_ss <- ss_get_result(ep="games", query=list(status="ended"), walk = TRUE) ## parse out the games and keep the columns we want games <- parse_stattle(games_ss, "games") games <- select(games, id, scoreline, home_team_id, away_team_id, winning_team_id, attendance, duration, home_team_score, away_team_score, score_differential, home_team_outcome, away_team_outcome) ## parse out the teams from the games API teams <- parse_stattle(games_ss, "home_teams") ## just need to get once teams <- unique(teams) %>% select(id, location, name, nickname, slug) ## write the datafiles for neo import #save.image(file="data/session.Rdata") #write.table(games, file="data/games.csv", sep=",", row.names=F, na="") #write.table(teams, file="data/teams.csv", sep=",", row.names=F, na="") #===================================================================== ## the files above are what I put onto google drive and read publicly ## the code below is what I used to develop locally #===================================================================== ## connect to a running neo4j server -- for local development # library(RNeo4j) # graph <- startGraph("http://localhost:7474/db/data/", # username = "neo4j", # password = "password") # # ## helper function to import cql files against neo4j import shell tool # ## a function to import cypher statements into the shell # build_import <- function(NEO_SHELL="~/neo4j-community-2.3.1/bin/neo4j-shell", # cypher_file) { # cmd = sprintf("%s -file %s", NEO_SHELL, cypher_file) # system(cmd) # } # # # ## clear the ENTIRE graph database # clear(graph, input=FALSE) # # ## import the constraints # build_import(cypher_file="cql/constraints.cql") # # ## import the database # build_import(cypher_file="cql/build-db.cql")