Skip to content

Instantly share code, notes, and snippets.

@gghatano
Created February 11, 2014 15:41
Show Gist options
  • Save gghatano/8937338 to your computer and use it in GitHub Desktop.
Save gghatano/8937338 to your computer and use it in GitHub Desktop.
pawapuro batting test score
library(data.table)
library(dplyr)
library(stringr)
# read data
dat = fread("all2013.csv")
fields = fread("fields.csv")
setnames(dat, fields$Header)
# processing datatable
dat$ball = with(dat, nchar(PITCH_SEQ_TX))
dat$BB = with(dat, nchar(gsub("[CSFX]","",PITCH_SEQ_TX)))
dat$hit_score = with(dat, ifelse(EVENT_CD >= 20 & EVENT_CD <= 23, EVENT_CD - 16, 0))
dat$score = with(dat, hit_score + BB)
# summarize
dat_pawapuro = dat %.% group_by(BAT_ID) %.%
dplyr::summarise(ball = sum(ball), BB = sum(BB), score = sum(score))
dat_pawapuro$p_10 = with(dat_pawapuro, score/ (ball - BB) * 10)
# over 500 balls
dat_pawapuro_over500ball = subset(dat_pawapuro, ball > 500)
# merge with name_id_data
master = fread("Master.csv")
master$fullname = with(master, paste(nameFirst, nameLast))
name_id_data = master[, c("retroID", "fullname"), with=F]
setnames(name_id_data, c("BAT_ID", "name"))
dat_fullname = merge(dat_pawapuro_over500ball, name_id_data, by="BAT_ID")
dat_final = dat_fullname[, c("name", "p_10", "score"), with=F]
# result
head(arrange(dat_final, desc(p_10)), 20)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment