Skip to content

Instantly share code, notes, and snippets.

@srbhbook
Last active September 22, 2016 20:50
Show Gist options
  • Save srbhbook/a22b51dfc5a7ca303ae157a99608986b to your computer and use it in GitHub Desktop.
Save srbhbook/a22b51dfc5a7ca303ae157a99608986b to your computer and use it in GitHub Desktop.
library(RCurl)
library(magrittr)
library(rvest)
library(data.table)
matchids<-c("62605","62616","62617","62618","62638","62639","62640","62662","62663","62664","62676","62677","62678","62679","62680","62690","62691","62692","62693","62694","62725","62726","62727","62728","62729","62737","62738","62739","62740","62741","62742","62743","62744","62745","62751","62752","62753","62754","62755","62782","62783","62784","62785","62786","62802","62803","62804","62805","62806","62817","62818","62819","62845","62846","62847","62848","62849","62860","62861","62862","62863","62864","62868","62869","62870","62871","62872","62883","62884","62885","62886","62887","62901","62902","62903","62904","62905","62911","62912","62913","62914","62915","62939","62940","62941","62942","62943","62952","62953","62954","62965","62966","62967","62968","62996","62997","62998","63004","63005","63006","63010","63011","63012","63013","63019","63020","63021","63022","63045","63046","63047","63051","63052","63053","63054","63055","63066","63067","63068","63069","63070","63076","63077","63078","63089","63090","63091","63092","63093","63125","63126","63127","63131","63132","63133","63134","63135","63156","63157","63158","63159","63160","63161","63162","63171","63172","63173","63174","63175","63176","63177","63178","63195","63196","63197","63198","63199","63217","63218","63219","63226","63227","63228","63229","63230","63231","63237","63238","63239","63240","63241","63242","63243","63244","63245","63246","63247","63248","63249","63250","63251","63252","63262","63279","63280","63281","63286","63287","63288","63298","63299","63300","63301","63302","63303","63314","63315","63316","63320","63329","63330","63331","63332","63333","63334","63335","63336","63337","63338","63339","63347","63348","63349","63350","63351","63352","63353","63354","63355","63381","63382","63391","63392","63393","63394","63395","63409","63410","63411","63418","63419","63420","63432","63433","63434","63438","63439","63440","63449","63450","63451","63452","63453","63454","63455","63456","63466","63467","63468","63469","63493","63494","63495","63503","63504","63505","63506","63513","63514","63515","63516","63523","63524","63525","63534","63535","63536","63548","63563","63564","63565","63566","63567","63583","63586","63587","63588","63589","63598","63599","63600","63604","63615","63616","63617","63631","63632","63633","63645","63660","63661","63662","63694","63695","63696","63713","63714","63715","63721","63724","63725","63726","63736","63737","63738","63745","63746","63747","63748","63749","63762","63763","63775","63776","63777","63794","63795","63796","63813","63826","63827","63828","63829","63830","63831","63848","63849","63850","63865","63866","63867","63870","63871","63898","63905","63906","63919","63920","63921","63934","63935","63943","63944","63945","63951","63952","63961","63962","63963","63976","63977","63984","63985","63986","63987","63988","63997","63998","63999","64000","64004","64005","64006","64020","64021","64046","64047","64059","64060","64061","64062","64081","64082","64083","64099","64100","64101","64102","64109","64110","64111","64112","64125","64126","64127","219062","219613","226361","226362","226363","232615","233797","234783","239025","238186","238187","239920","239921","239922","239923","249215","249216","249217","282691","282692","258468","258469","258470","297806","297807","297808","291351","291352","291353","291354","332911","332912","332913","343729","343730","343731","345669","345670","345671","345672","361050","361051","366628","386496","366629","430881","430882","430883","434256","434257","441825","441826","456669","456670","456671","464526","464527","464531","464532","464533","463146","463147","463148","489226","489227","489228","474472","474473","474474","474475","535997","535998","535999","518950","518951","518952","518953","565817","565818","565806","565807","565808","565809","598812","598813","598814","598815","676525","676527","648665","648667","667651","667653","667711","667713","667715","667717","667719","754737","754739","754741","754743","870729","895773","895775","895777","903603","903605","903607")
matchids<-c(matchids,"903609","1022593","1022595","1022597","1022599","1030213")
urls<-paste0("http://www.espncricinfo.com/west-indies-v-india-2016/engine/match/",matchids,".html?view=averages")
urlerror<<-c()
raw<-list()
for(i in 1:500 ){
tryCatch({
print(i)
raw[[i]]<-getURL(urls[i], httpheader = c('User-Agent' = "Mosilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11"))
}, error=function(e){
urlerror<<-c(urlerror,i)
cat("ERROR :",i," : ",conditionMessage(e), "\n")
})
}
parseerror<<-c()
clean<-list()
for(i in 1:500 ){
tryCatch({
print(i)
clean[[i]]<-raw[[i]] %>% xml2::read_html() %>% html_nodes(".careeravg-table") %>% `[`(. %like% "India") %>% `[`(1) %>% html_nodes(".data-link") %>% data.table(html_text(.),html_attr(.,"href") %>% stringr::str_match(".*player/(.*).html") %>% `[`(,2) ) %>% `[`(,2:3,with=F) %>% cbind(i,.)
}, error=function(e){
parseerror<<-c(parseerror,i)
cat("ERROR :",i," : ",conditionMessage(e), "\n")
})
}
d1<-Reduce(rbind,clean)
#Incidence Matrix
imat<-table(d1[,list(i,V3)])
#datatable with 285C2 rows
dt<-CJ(1:285,1:285)
dt[,row:=.I]
#Brute Force comparison for each row
dt[,s1:=sum(imat[,V1]>imat[,V2]),row]
baap<-unique(dt[s1==0][V1!=V2]$V2)
beta<-unique(dt[s1==0][V1!=V2]$V1)
#baap "dominated" beta
length(beta) #213
## Minimum players to span all 500
count<<-1
players<<-c()
setcover<-function(set){
new<-set[,.N,V3][order(-N)][1]$V3
newset<-set[!i %in% set[V3==new,1,i]$i]
players<<-c(players,new)
if(nrow(newset)==0) return(count)
count++
setcover(newset)
}
setcover(d1)
players[,row:=.I]
output<-merge(players,d1[,unique(V2),V3],by="V3")
output[order(row)]
#check
output[,sum(unique(N))]
output[order(row)][,list(.GRP,Player=V1),N]
#oldcode
#ll<-lapply(1:length(players),function(i) {
# p=players[i]
# list(d1[V3==p][1,V2],d1[V3==p][,length(unique(i))])
#})
#data.table(unlist(ll))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment