Skip to content

Instantly share code, notes, and snippets.

@gghatano
Last active August 29, 2015 13:57
Show Gist options
  • Save gghatano/9778490 to your computer and use it in GitHub Desktop.
Save gghatano/9778490 to your computer and use it in GitHub Desktop.
Visualize the records of legend batters (MLB)
playerID careerHit careerHit.x careerHit.y fullname
aaronha01 3771 3771 3771 Hank Aaron
ansonca01 3418 3418 3418 Cap Anson
biggicr01 3060 3060 3060 Craig Biggio
boggswa01 3010 3010 3010 Wade Boggs
brettge01 3154 3154 3154 George Brett
brocklo01 3023 3023 3023 Lou Brock
carewro01 3053 3053 3053 Rod Carew
clemero01 3000 3000 3000 Roberto Clemente
cobbty01 4189 4189 4189 Ty Cobb
collied01 3315 3315 3315 Eddie Collins
gwynnto01 3141 3141 3141 Tony Gwynn
henderi01 3055 3055 3055 Rickey Henderson
jeterde01 3304 3304 3304 Derek Jeter
kalinal01 3007 3007 3007 Al Kaline
lajoina01 3242 3242 3242 Nap Lajoie
mayswi01 3283 3283 3283 Willie Mays
molitpa01 3319 3319 3319 Paul Molitor
murraed02 3255 3255 3255 Eddie Murray
musiast01 3630 3630 3630 Stan Musial
palmera01 3020 3020 3020 Rafael Palmeiro
ripkeca01 3184 3184 3184 Cal Ripken
rosepe01 4256 4256 4256 Pete Rose
speaktr01 3514 3514 3514 Tris Speaker
wagneho01 3415 3415 3415 Honus Wagner
wanerpa01 3152 3152 3152 Paul Waner
winfida01 3110 3110 3110 Dave Winfield
yastrca01 3419 3419 3419 Carl Yastrzemski
yountro01 3142 3142 3142 Robin Yount
library(shiny)
library(rCharts)
library(dplyr)
library(magrittr)
library(Lahman)
library(data.table)
# Define server logic for slider examples
shinyServer(function(input, output){
output$chart <- reactivePlot(function(){
# filter over 3000 hit batters
batting_legend_career =
Batting %>% as.data.table %>%
dplyr::select(playerID, yearID, H) %>%
group_by(playerID) %>%
dplyr::summarise(careerHit = sum(H)) %>%
filter(careerHit >= 3000) %>%
arrange(desc(careerHit)) %>%
select(playerID) %>% rbind("suzukic01")
# data of over 3000 hit batters
batting_legend_data =
Batting %>% as.data.table %>%
inner_join(batting_legend_career, by = "playerID")
# merge the records of the same year
batting_legend_data =
batting_legend_data %>%
as.data.table() %>%
group_by(playerID, yearID, add=FALSE) %>%
dplyr::summarise(H = sum(H), SO = sum(SO), RBI = sum(RBI), HR = sum(HR))
# calculate the cumsum of hits
batting_legend_career_data =
batting_legend_data %>%
as.data.table %>%
group_by(playerID, add=FALSE) %>%
dplyr::summarise(yearID = yearID,
careerHIT = cumsum(H),
careerSO = cumsum(SO),
careerHR = cumsum(HR),
careerRBI = cumsum(RBI),
start = min(yearID), end = max(yearID))
# merge with fullname_datatable
fullname_id = fread("legends.csv") %>%
select(playerID, fullname) %>%
rbind( data.table(playerID = "suzukic01", fullname = "Ichiro Suzuki" ))
batting_legend_career_data_fullname =
batting_legend_career_data %>% inner_join(fullname_id, by = "playerID")
# make the range of career to plot
# range = c(1950, 2012)
range = input$range
# filter
batting_legend_career_data_filtered =
batting_legend_career_data_fullname %>%
filter(start >= range[1] & end <= range[2])
gp = ggplot(data = batting_legend_career_data_filtered,
aes_string(x = "yearID", y=input$data, color = "fullname")) +
geom_point(size = 4) + geom_line(size = 1) +
ggtitle(input$data) +
xlab("year") +
theme(plot.title=element_text(size = 24, face = "bold"))
print(gp)
})
})
library(shiny)
# Define UI for slider demo application
shinyUI(pageWithSidebar(
# Application title
headerPanel("MLB Legend Batters (over 3000 hit)"),
# Sidebar with sliders that demonstrate various available options
sidebarPanel(
radioButtons("data", "Data:",
list("Hit" = "careerHIT",
"Homerun" = "careerHR",
"Strike Out" = "careerSO",
"RBI" = "careerRBI")),
br(),
sliderInput("range", "Time Span (year):",
min = 1874, max = 2012, value = c(1900,2012))
),
# Show a table summarizing the values entered
mainPanel(
plotOutput("chart")
# package 'kernlab' = kernel pca
)
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment