Skip to content

Instantly share code, notes, and snippets.

@mrchypark
Last active October 13, 2017 15:27
Show Gist options
  • Save mrchypark/7034677a48e78284df574abaf8618e46 to your computer and use it in GitHub Desktop.
Save mrchypark/7034677a48e78284df574abaf8618e46 to your computer and use it in GitHub Desktop.
library(rvest)
library(stringr)
dat<-c()
for (i in 1:2005){
url<-paste0("http://www.bobaedream.co.kr/cyber/CyberCar.php?gubun=K&page=",i)
usedCar <- read_html(url)
title <-
usedCar %>%
html_nodes("td.carinfo a.title") %>%
html_text
info <-
usedCar %>%
html_nodes("a.sub_01") %>%
html_text %>%
strsplit("ㅣ") %>%
unlist %>%
matrix(ncol=3,byrow=T)
transmission <- info[,1]
fuel <- info[,2]
distance <-
info[,3] %>%
str_replace(" [a-z]+","") %>%
str_replace_all(",","") %>%
as.numeric
price <-
usedCar %>%
html_nodes("td.price em") %>%
html_text %>%
str_replace_all(",","") %>%
as.numeric
tem<-data.frame(title,transmission,fuel,distance,price)
dat<-rbind(dat,tem)
print(i)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment