Last active
February 22, 2017 15:21
-
-
Save actuaryactually/deb659699f93b27e9d81a3c310b98e3c to your computer and use it in GitHub Desktop.
Quake Visualisations (QV)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#PRELIMINARIES: | |
install.packages("pacman") | |
pacman::p_load(XML,rvest,magrittr,RCurl,stringi) | |
#Step 1: Establish location names and addresses for Harvey Norman Electrical stores | |
#draw in address locations from website: | |
url<-"http://www.harveynorman.co.nz/store-finder.html" | |
rvest_target<-read_html(url) | |
rvest_table_nodes <- html_nodes(rvest_target,"p.address ") | |
addresses<-html_text(rvest_table_nodes,TRUE) | |
##Note - https://cran.r-project.org/web/packages/rvest/vignettes/selectorgadget.html | |
## This link contains details on how to use the SelectorGadget tool to identify CSS markers on webpages | |
#remove nearly all html carriage returns and formatting | |
addresses<-strsplit(addresses," \r\n\t") | |
#write output to a matrix in which view map and other spaces are removed: | |
size.reqd<-length(addresses) | |
output.addresses<- matrix(nrow=size.reqd,ncol=1) | |
for (i in 1:length(addresses)){ | |
output.addresses[i]<-unlist(strsplit(paste0(unlist(addresses[i]),sep = ", ", collapse = ""),"View Map,")) | |
} | |
print(output.addresses) | |
#scan output for errors then manually correct: NB rows 2 and 37 | |
#row 2 first: | |
temp<-paste0(unlist(strsplit(output.addresses[2],"\r\n\t")),sep = ", ", collapse = "") | |
nchar(temp) #66 characters long, so retain all but last two to purge extra comma | |
output.addresses[2]= substr(temp,start=1, stop=64) | |
rm(temp) | |
#then correct row 37: | |
temp<-paste0(unlist(strsplit(output.addresses[37],"\r\n")),sep = ", ", collapse = "") | |
nchar(temp) #66 characters long, so retain all but last two to purge extra comma | |
output.addresses[37]= substr(temp,start=1, stop=46) | |
rm(temp) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment