Skip to content

Instantly share code, notes, and snippets.

@yonicd
Last active October 3, 2017 02:11
Show Gist options
  • Select an option

  • Save yonicd/a05c4d85f7884c517a6cfa6aff24f59a to your computer and use it in GitHub Desktop.

Select an option

Save yonicd/a05c4d85f7884c517a6cfa6aff24f59a to your computer and use it in GitHub Desktop.
Function to scrape github to combine your traffic across repositories into one plot
library(RSelenium)
library(XML)
library(ggplot2)
library(reshape2)
library(plyr)
library(dplyr)
gh_user <- '<your github login name>'
gh_pass <- '<your github login password>'
gh_team <- '<team associated with account>'
repos <- '<repositories in team>'
#failsafe if function fails and you need to close the port manually
#rD <- rsDriver(verbose = FALSE,port=4444L)
#remDr <- rD$client
#remDr$close()
github_traffic <- function(gh_user,gh_pass,gh_team,repos){
rD <- rsDriver(verbose = FALSE)
remDr <- rD[["client"]]
remDr$navigate(sprintf('https://github.com/%s/%s/graphs/traffic',gh_team,repos[1]))
webElem <- remDr$findElement(using = 'id', value = "login_field")
webElem$setElementAttribute(attributeName = 'value',value = gh_user)
webElem <- remDr$findElement(using = 'id', value = "password")
webElem$setElementAttribute(attributeName = 'value',value = gh_pass)
webElem=remDr$findElement(using = 'xpath','//*[@id="login"]/form/div[4]/input[3]')
webElem$clickElement()
Sys.sleep(1)
out <- plyr::llply(repos,function(repo){
remDr$navigate(sprintf('https://github.com/%s/%s/graphs/traffic',gh_team,repo))
Sys.sleep(1)
out <- XML::htmlParse(remDr$getPageSource(),asText = TRUE)
sapply(c('clones','visitors'),function(type){
XML::getNodeSet(out,sprintf(sprintf('//*[@id="js-%s-graph"]/div/div[1]/svg/g/g',type)))
},simplify = FALSE,USE.NAMES = TRUE)
},.progress = 'text')
names(out) <- repos
remDr$close()
rD[["server"]]$stop()
plot_data <- plyr::ldply(out,function(repo){
plyr::mdply(names(repo),function(type){
dat <- repo[[type]]
if(is.null(dat)) return(NULL)
yticks_total <- as.numeric(sapply(getNodeSet(dat[[2]],'g'),XML::xmlValue))
yticks_unique <- as.numeric(sapply(getNodeSet(dat[[5]],'g'),XML::xmlValue))
x <- data.frame(type=type,
date = as.Date(sapply(getNodeSet(dat[[1]],'g'),XML::xmlValue),format = '%m/%d'),
total = as.numeric(sapply(getNodeSet(dat[[3]],'circle'),XML::xmlGetAttr,name = 'cy')),
unique = as.numeric(sapply(getNodeSet(dat[[4]],'circle'),XML::xmlGetAttr,name = 'cy')))
x$total <- rescale(x$total,rev(range(yticks_total)))
x$unique <- rescale(x$unique,rev(range(yticks_unique)))
x%>%reshape2::melt(.,c('type','date'),variable.name=c('metric'))
})
},.id='repo')%>%select(-X1)
ggplot(plot_data,aes(x=date,y=value,colour=repo))+
geom_point()+geom_line()+
facet_grid(type~metric,scales='free_y')+
scale_x_date(date_breaks = "1 day",date_labels = "%m/%d")+
theme_bw()+
theme(axis.text.x = element_text(angle=90),legend.position = 'top')+
labs(title=sprintf('Github Team: %s',gh_team))
}
traffic_plot <- github_traffic(gh_user=gh_user,
gh_pass=gh_pass,
gh_team=gh_team,
repos=repos)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment