Last active
October 3, 2017 02:11
-
-
Save yonicd/a05c4d85f7884c517a6cfa6aff24f59a to your computer and use it in GitHub Desktop.
Function to scrape github to combine your traffic across repositories into one plot
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(RSelenium) | |
| library(XML) | |
| library(ggplot2) | |
| library(reshape2) | |
| library(plyr) | |
| library(dplyr) | |
| gh_user <- '<your github login name>' | |
| gh_pass <- '<your github login password>' | |
| gh_team <- '<team associated with account>' | |
| repos <- '<repositories in team>' | |
| #failsafe if function fails and you need to close the port manually | |
| #rD <- rsDriver(verbose = FALSE,port=4444L) | |
| #remDr <- rD$client | |
| #remDr$close() | |
| github_traffic <- function(gh_user,gh_pass,gh_team,repos){ | |
| rD <- rsDriver(verbose = FALSE) | |
| remDr <- rD[["client"]] | |
| remDr$navigate(sprintf('https://github.com/%s/%s/graphs/traffic',gh_team,repos[1])) | |
| webElem <- remDr$findElement(using = 'id', value = "login_field") | |
| webElem$setElementAttribute(attributeName = 'value',value = gh_user) | |
| webElem <- remDr$findElement(using = 'id', value = "password") | |
| webElem$setElementAttribute(attributeName = 'value',value = gh_pass) | |
| webElem=remDr$findElement(using = 'xpath','//*[@id="login"]/form/div[4]/input[3]') | |
| webElem$clickElement() | |
| Sys.sleep(1) | |
| out <- plyr::llply(repos,function(repo){ | |
| remDr$navigate(sprintf('https://github.com/%s/%s/graphs/traffic',gh_team,repo)) | |
| Sys.sleep(1) | |
| out <- XML::htmlParse(remDr$getPageSource(),asText = TRUE) | |
| sapply(c('clones','visitors'),function(type){ | |
| XML::getNodeSet(out,sprintf(sprintf('//*[@id="js-%s-graph"]/div/div[1]/svg/g/g',type))) | |
| },simplify = FALSE,USE.NAMES = TRUE) | |
| },.progress = 'text') | |
| names(out) <- repos | |
| remDr$close() | |
| rD[["server"]]$stop() | |
| plot_data <- plyr::ldply(out,function(repo){ | |
| plyr::mdply(names(repo),function(type){ | |
| dat <- repo[[type]] | |
| if(is.null(dat)) return(NULL) | |
| yticks_total <- as.numeric(sapply(getNodeSet(dat[[2]],'g'),XML::xmlValue)) | |
| yticks_unique <- as.numeric(sapply(getNodeSet(dat[[5]],'g'),XML::xmlValue)) | |
| x <- data.frame(type=type, | |
| date = as.Date(sapply(getNodeSet(dat[[1]],'g'),XML::xmlValue),format = '%m/%d'), | |
| total = as.numeric(sapply(getNodeSet(dat[[3]],'circle'),XML::xmlGetAttr,name = 'cy')), | |
| unique = as.numeric(sapply(getNodeSet(dat[[4]],'circle'),XML::xmlGetAttr,name = 'cy'))) | |
| x$total <- rescale(x$total,rev(range(yticks_total))) | |
| x$unique <- rescale(x$unique,rev(range(yticks_unique))) | |
| x%>%reshape2::melt(.,c('type','date'),variable.name=c('metric')) | |
| }) | |
| },.id='repo')%>%select(-X1) | |
| ggplot(plot_data,aes(x=date,y=value,colour=repo))+ | |
| geom_point()+geom_line()+ | |
| facet_grid(type~metric,scales='free_y')+ | |
| scale_x_date(date_breaks = "1 day",date_labels = "%m/%d")+ | |
| theme_bw()+ | |
| theme(axis.text.x = element_text(angle=90),legend.position = 'top')+ | |
| labs(title=sprintf('Github Team: %s',gh_team)) | |
| } | |
| traffic_plot <- github_traffic(gh_user=gh_user, | |
| gh_pass=gh_pass, | |
| gh_team=gh_team, | |
| repos=repos) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment