Last active
January 3, 2016 13:59
-
-
Save gghatano/8473615 to your computer and use it in GitHub Desktop.
relation between pitching rhythm and assist score
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(pitchRx) | |
library(chron) | |
library(doBy) | |
library(plyr) | |
library(ggplot2) | |
#data2013 <- scrapeFX(start="2013-02-01", end="2013-10-01") | |
data2013 <- read.csv("2013.csv") | |
#data2013 <- subset(data2013, sv_id != "NA") | |
# add game_id | |
data2013$game_id <- substr(data2013$url, 70, 95) | |
# add pitching time | |
make_time <- function(game_data = game_data){ | |
pitch_time <- substr(game_data$sv_id, 8,13) | |
hour <- substr(pitch_time, 1,2) | |
min <- substr(pitch_time, 3,4) | |
sec <- substr(pitch_time, 5,6) | |
time <- paste(hour,min,sec,sep=":") | |
game_data$time <- time | |
return(game_data) | |
} | |
# get the diff of pitching time | |
pitching_rhythm <- function(time){ | |
pitches <- length(time) | |
time <- times(time) | |
time_diff <- diff(time) | |
time_diff <- as.numeric(time_diff) | |
time_sec <- time_diff * 24 * 60 * 60 | |
time_sec <- time_sec[abs(time_sec)<120] | |
return(c(mean(time_sec, na.rm=TRUE), var(time_sec, na.rm=TRUE), pitches)) | |
} | |
# make pitching_tempo data | |
tempo_data <- summaryBy(time~pitcher_name + game_id + top_inning, | |
data = make_time(data2013), | |
FUN=pitching_rhythm, | |
keep.names = TRUE) | |
get_runs <- function(x){ | |
return(max(x, na.rm =TRUE)) | |
} | |
# make the game score | |
game_score <- summaryBy(home_team_runs + away_team_runs ~ game_id, | |
data = data2013, | |
FUN = get_runs, | |
keep.names =TRUE) | |
# remove the NAs and INF | |
game_score <- subset(game_score, home_team_runs != -Inf) | |
# join the dataframe | |
tempo_score_data <- join(tempo_data, game_score, by = "game_id") | |
# add the assist score | |
tempo_score_data$assist <- with(tempo_score_data, | |
ifelse(top_inning=="Y", | |
home_team_runs, away_team_runs)) | |
# over 50 pitches | |
tempo_score_data_over50 <- subset(tempo_score_data, time.FUN3 > 50) | |
# personal data | |
japs <- c("Yu Darvish", "Hisashi Iwakuma", "Hiroki Kuroda") | |
darvish_data <- subset(tempo_score_data_over50, pitcher_name == "Yu Darvish") | |
kuroda_data <- subset(tempo_score_data_over50, pitcher_name == "Hiroki Kuroda") | |
iwakuma_data <- subset(tempo_score_data_over50, pitcher_name == "Hisashi Iwakuma") | |
japs_data <- subset(tempo_score_data_over50, pitcher_name %in% japs) | |
# histogram (mean) devided by assist score | |
tempo_score_data_over50_under8score <- subset(tempo_score_data_over50, assist<=8) | |
gghist <- ggplot(data = tempo_score_data_over50_under8score, aes(x=time.FUN1)) | |
gghist <- gghist + geom_histogram(fill="white", colour="black") | |
gghist <- gghist + facet_wrap(~assist, nrow = 10) | |
gghist <- gghist + xlab("mean tempo") | |
gghist <- gghist + theme(axis.title.x = element_text(size = 16, face = "bold")) | |
gghist <- gghist + theme(axis.title.y = element_text(size = 16, face = "bold")) | |
gghist <- gghist + theme(strip.text.x = element_text(size = 16, face = "bold")) | |
gghist <- gghist + ggtitle("mean tempo histogram") | |
gghist <- gghist + theme(plot.title = element_text(size = 20, face = "bold")) | |
gghist | |
# mean vs assist score (Kuroda) | |
p <- ggplot(data = kuroda_data, aes(x = time.FUN1, y = assist)) | |
p <- p + geom_point(size = 3) | |
p <- p + xlab("mean (sec)") | |
p <- p + ggtitle("mean tempo (sec) vs assist score (Kuroda)") | |
p <- p + theme(axis.title.x = element_text(size = 16, face = "bold")) | |
p <- p + theme(axis.title.y = element_text(size = 16, face = "bold")) | |
p <- p + theme(plot.title = element_text(size = 16, face = "bold")) | |
p |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment