Skip to content

Instantly share code, notes, and snippets.

@gghatano
Last active January 3, 2016 13:59
Show Gist options
  • Save gghatano/8473615 to your computer and use it in GitHub Desktop.
Save gghatano/8473615 to your computer and use it in GitHub Desktop.
relation between pitching rhythm and assist score
library(pitchRx)
library(chron)
library(doBy)
library(plyr)
library(ggplot2)
#data2013 <- scrapeFX(start="2013-02-01", end="2013-10-01")
data2013 <- read.csv("2013.csv")
#data2013 <- subset(data2013, sv_id != "NA")
# add game_id
data2013$game_id <- substr(data2013$url, 70, 95)
# add pitching time
make_time <- function(game_data = game_data){
pitch_time <- substr(game_data$sv_id, 8,13)
hour <- substr(pitch_time, 1,2)
min <- substr(pitch_time, 3,4)
sec <- substr(pitch_time, 5,6)
time <- paste(hour,min,sec,sep=":")
game_data$time <- time
return(game_data)
}
# get the diff of pitching time
pitching_rhythm <- function(time){
pitches <- length(time)
time <- times(time)
time_diff <- diff(time)
time_diff <- as.numeric(time_diff)
time_sec <- time_diff * 24 * 60 * 60
time_sec <- time_sec[abs(time_sec)<120]
return(c(mean(time_sec, na.rm=TRUE), var(time_sec, na.rm=TRUE), pitches))
}
# make pitching_tempo data
tempo_data <- summaryBy(time~pitcher_name + game_id + top_inning,
data = make_time(data2013),
FUN=pitching_rhythm,
keep.names = TRUE)
get_runs <- function(x){
return(max(x, na.rm =TRUE))
}
# make the game score
game_score <- summaryBy(home_team_runs + away_team_runs ~ game_id,
data = data2013,
FUN = get_runs,
keep.names =TRUE)
# remove the NAs and INF
game_score <- subset(game_score, home_team_runs != -Inf)
# join the dataframe
tempo_score_data <- join(tempo_data, game_score, by = "game_id")
# add the assist score
tempo_score_data$assist <- with(tempo_score_data,
ifelse(top_inning=="Y",
home_team_runs, away_team_runs))
# over 50 pitches
tempo_score_data_over50 <- subset(tempo_score_data, time.FUN3 > 50)
# personal data
japs <- c("Yu Darvish", "Hisashi Iwakuma", "Hiroki Kuroda")
darvish_data <- subset(tempo_score_data_over50, pitcher_name == "Yu Darvish")
kuroda_data <- subset(tempo_score_data_over50, pitcher_name == "Hiroki Kuroda")
iwakuma_data <- subset(tempo_score_data_over50, pitcher_name == "Hisashi Iwakuma")
japs_data <- subset(tempo_score_data_over50, pitcher_name %in% japs)
# histogram (mean) devided by assist score
tempo_score_data_over50_under8score <- subset(tempo_score_data_over50, assist<=8)
gghist <- ggplot(data = tempo_score_data_over50_under8score, aes(x=time.FUN1))
gghist <- gghist + geom_histogram(fill="white", colour="black")
gghist <- gghist + facet_wrap(~assist, nrow = 10)
gghist <- gghist + xlab("mean tempo")
gghist <- gghist + theme(axis.title.x = element_text(size = 16, face = "bold"))
gghist <- gghist + theme(axis.title.y = element_text(size = 16, face = "bold"))
gghist <- gghist + theme(strip.text.x = element_text(size = 16, face = "bold"))
gghist <- gghist + ggtitle("mean tempo histogram")
gghist <- gghist + theme(plot.title = element_text(size = 20, face = "bold"))
gghist
# mean vs assist score (Kuroda)
p <- ggplot(data = kuroda_data, aes(x = time.FUN1, y = assist))
p <- p + geom_point(size = 3)
p <- p + xlab("mean (sec)")
p <- p + ggtitle("mean tempo (sec) vs assist score (Kuroda)")
p <- p + theme(axis.title.x = element_text(size = 16, face = "bold"))
p <- p + theme(axis.title.y = element_text(size = 16, face = "bold"))
p <- p + theme(plot.title = element_text(size = 16, face = "bold"))
p
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment