Skip to content

Instantly share code, notes, and snippets.

@mrchypark
Created August 23, 2017 07:52
Show Gist options
  • Save mrchypark/043d7e6e6b321e4e02f80e13738b7232 to your computer and use it in GitHub Desktop.
Save mrchypark/043d7e6e6b321e4e02f80e13738b7232 to your computer and use it in GitHub Desktop.
crawling hikaku using r
library(rvest)
root<-"http://www.ts-hikaku.com/clist/a0/v1s22t0p"
tail<-".html"
options(stringsAsFactors = F)
dat<-c()
for(i in 1:34){
print(i)
tar<-read_html(paste0(root,i,tail))
company <-
tar %>%
html_nodes("div.panel-body div.row .col-sm-3 p a") %>%
html_text
salary <-
tar %>%
html_nodes("div.row div strong") %>%
html_text
location <-
tar %>%
html_nodes("div.row div.col-sm-3 div.row div.col-sm-7") %>%
html_text %>%
.[3:length(.)]
col4 <-
tar %>%
html_nodes("div.row div.col-sm-2") %>%
html_text %>%
.[9:length(.)]
allcnt<-length(col4)
age <- col4[1:(allcnt/4)]
marg <- col4[((allcnt/4)+1):(allcnt/4*2)]
cnt <- col4[((allcnt/4)*2+1):(allcnt/4*3)]
year <- col4[((allcnt/4)*3+1):allcnt]
power <-
tar %>%
html_nodes("div.row div.col-xs-12") %>%
html_text %>%
.[3:length(.)]
tem<-data.frame(company, salary, location, age, marg, cnt, year,power)
dat<-rbind(dat,tem)
}
write.csv(dat,"./data.csv",row.names = F)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment