Skip to content

Instantly share code, notes, and snippets.

@actuaryactually
Last active February 20, 2017 04:56
Show Gist options
  • Save actuaryactually/a24304a074722ae5ac3452d2cef62c4a to your computer and use it in GitHub Desktop.
Save actuaryactually/a24304a074722ae5ac3452d2cef62c4a to your computer and use it in GitHub Desktop.
Quake Visualisations
#PRELIMINARIES:
install.packages("pacman")
pacman::p_load(XML,rvest,magrittr,RCurl,stringi)
#Step 1: Establish location names and addresses for Harvey Norman Electrical stores
#draw in address locations from website:
url<-"http://www.harveynorman.co.nz/store-finder.html"
rvest_target<-read_html(url)
rvest_table_nodes <- html_nodes(rvest_target,"p.address ")
addresses<-html_text(rvest_table_nodes,TRUE)
##Note - https://cran.r-project.org/web/packages/rvest/vignettes/selectorgadget.html
## This link contains details on how to use the SelectorGadget tool to identify CSS markers on webpages
#remove nearly all html carriage returns and formatting
addresses<-strsplit(addresses," \r\n\t")
#write output to a matrix in which view map and other spaces are removed:
size.reqd<-length(addresses)
output.addresses<- matrix(nrow=size.reqd,ncol=1)
for (i in 1:length(addresses)){
output.addresses[i]<-unlist(strsplit(paste0(unlist(addresses[i]),sep = ", ", collapse = ""),"View Map,"))
}
print(output.addresses)
#scan output for errors then manually correct: NB rows 2 and 37
#row 2 first:
temp<-paste0(unlist(strsplit(output.addresses[2],"\r\n\t")),sep = ", ", collapse = "")
nchar(temp) #66 characters long, so retain all but last two to purge extra comma
output.addresses[2]= substr(temp,start=1, stop=64)
rm(temp)
#then correct row 37:
temp<-paste0(unlist(strsplit(output.addresses[37],"\r\n")),sep = ", ", collapse = "")
nchar(temp) #66 characters long, so retain all but last two to purge extra comma
output.addresses[37]= substr(temp,start=1, stop=46)
rm(temp)
#PRELIMINARIES:
pacman::p_load(XML)
#recode as data frame :
output.addresses.df<-data.frame(c(output.addresses))
output.addresses.df<-cbind(output.addresses.df,long=1,lat=2)
#assign long/lat
for (i in 1:length(output.addresses)){
addr <-output.addresses[i]
url = paste('http://maps.google.com/maps/api/geocode/xml?address=', addr,'&sensor=false',sep='') # construct the URL
doc = xmlTreeParse(url)
root = xmlRoot(doc)
output.addresses.df[i,2] = xmlValue(root[['result']][['geometry']][['location']][['lng']])
output.addresses.df[i,3] = xmlValue(root[['result']][['geometry']][['location']][['lat']])
}
print(output.addresses.df)
#row 35 and 38 have problems - 35 is returning something with long/lats that are way off; and 38 is throwing an error.
#Manually searching google maps shows that the value for this row, needs a slight change. Overwriting using the following
#approach solves the problem....
addr <-" Cnr West Street & Moore Street, Ashburton"
url = paste('http://maps.google.com/maps/api/geocode/xml?address=', addr,'&sensor=false',sep='') # construct the URL
doc = xmlTreeParse(url)
root = xmlRoot(doc)
output.addresses.df[35,2] = xmlValue(root[['result']][['geometry']][['location']][['lng']])
output.addresses.df[35,3] = xmlValue(root[['result']][['geometry']][['location']][['lat']])
addr <-"Cnr Maclaggan Street & Rattray Street, Dunedin"
url = paste('http://maps.google.com/maps/api/geocode/xml?address=', addr,'&sensor=false',sep='') # construct the URL
doc = xmlTreeParse(url)
root = xmlRoot(doc)
output.addresses.df[38,2] = xmlValue(root[['result']][['geometry']][['location']][['lng']])
output.addresses.df[38,3] = xmlValue(root[['result']][['geometry']][['location']][['lat']])
#all locations assigned now:
print(output.addresses.df)
#format long lat as numbers (needed later)
colnames(output.addresses.df)<-c("Address","long","lat")
output.addresses.df$Address<-as.character(output.addresses.df$Address)
output.addresses.df$long<-as.numeric(output.addresses.df$long)
output.addresses.df$lat<-as.numeric(output.addresses.df$lat)
#major towns
town.names<-c("Wellington, New Zealand",
"Picton, New Zealand",
"Nelson, New Zealand",
"Blenheim, New Zealand")
major.towns.df<-data.frame(town.names)
major.towns.df<-cbind(major.towns.df,long=1,lat=2)
for (i in 1:length(town.names)){
addr <-town.names[i]
url = paste('http://maps.google.com/maps/api/geocode/xml?address=', addr,'&sensor=false',sep='') # construct the URL
doc = xmlTreeParse(url)
root = xmlRoot(doc)
major.towns.df[i,2] = xmlValue(root[['result']][['geometry']][['location']][['lng']])
major.towns.df[i,3] = xmlValue(root[['result']][['geometry']][['location']][['lat']])
}
print(major.towns.df)
town.names.short<-c("Wellington",
"Picton",
"Nelson",
"Blenheim")
major.towns.df$town.names<-town.names.short
major.towns.df$long<-as.numeric(major.towns.df$long)
major.towns.df$lat<-as.numeric(major.towns.df$lat)
#PRELIMINARIES:
pacman::p_load(data.table)
mydat1 <- fread('http://quakesearch.geonet.org.nz/csv?region=wellington&minmag=1&startdate=2014-05-01&enddate=2017-1-30T17:00:00')
head(mydat)
#the following site contains historic earthquake data for New Zealand: http://quakesearch.geonet.org.nz/
#Using the build query option on this page, we can look-up quakes in specific regions and time frames.
#Let's look up Wellington's seismic profile since 1 Jan 2006 - i.e. around 10 years' worth...
#three links are indicated, each covering a non overlapping date ranges:
url1 <- 'http://quakesearch.geonet.org.nz/csv?region=wellington&minmag=1&startdate=2014-05-01&enddate=2017-1-30T17:00:00'
url2 <- 'http://quakesearch.geonet.org.nz/csv?region=wellington&minmag=1&startdate=2011-07-01&enddate=2014-05-01'
url3 <- 'http://quakesearch.geonet.org.nz/csv?region=wellington&minmag=1&startdate=2011-01-01T16:00:00&enddate=2011-07-01'
mydat1 <- fread(url1)
mydat2 <- fread(url2)
mydat3 <- fread(url3)
mydat <- rbind(mydat1,mydat2,mydat3)
#review download then tidy up:
head(mydat)
rm(mydat1,mydat2,mydat3)
colnames(mydat)[5] <- "long"
colnames(mydat)[6] <- "lat"
pacman::p_load(ggmap,fitdistrplus,maps,mapproj,ggrepel)
wellington = c(lon = 174.7762, lat = -41.28646)
wellington.map=get_map(location=wellington,zoom=8,color="bw")
r <- ggmap(wellington.map,extent="panel",maprange=FALSE)%+% mydat + aes(x = long, y = lat)
r <- r + geom_density2d()
r <- r + stat_density2d(aes(fill = ..level.., alpha = ..level..), size = 0.1, bins = 100, geom = 'polygon')
r <- r + scale_fill_gradient(low = "green", high = "red") + scale_alpha(range = c(0.00, 0.25), guide = FALSE)
r <- r + geom_point(data=output.addresses.df, aes(x=long, y=lat),
color="blue",shape=15)
r <- r + labs(
title = "NZ EQ hotspots 1 Jan 2006 - Present Vs Exposure: (i.e. location of Harvey Norman Store Outlets)",
subtitle = "The Blenheim store appears to be closest to an active zone",
caption = "Sources: http://info.geonet.org.nz/ and http://www.harveynorman.co.nz/store-finder.html")
r
pacman::p_load(fitdistrplus,reshape2,grid,gridExtra)
#Note - you need to have already run the prior 4 scripts and have the variables in memory
#severity over time:
s <- ggplot(data=mydat,aes(x=as.Date(origintime),y=magnitude))
s <- s + geom_point(alpha=1/25,color="coral1",shape=16) + geom_smooth(span=0.3,se=TRUE)
s <- s + labs(
title = "Over Time...",
subtitle = "One circle per quake. 25 quakes to produce a solid pink circle. Blue line shows smoothed average (GAM model)",
caption = "Source: http://info.geonet.org.nz/"
)
s <- s + xlab("") + ylab("Magnitude")
s <- s + theme_minimal()
#severity - not, you need to experiment with the binning to see the shape clearly
t <- ggplot(data=mydat,aes(x=magnitude))
t <- t + geom_histogram(binwidth = 0.1) + geom_histogram(aes(fill = ..count..)) + scale_fill_gradient("Magnitude", low = "green", high = "red")
t <- t +labs(
title = "Observed variability",
caption = "Source: http://info.geonet.org.nz/"
)
t <- t + theme_minimal()
t <- t + xlab("Magnitude") + ylab("Count")
#Try fitting the severity using parametric distribution:
fg<-fitdist(mydat$magnitude,"gamma")
fln<-fitdist(mydat$magnitude,"lnorm")
fw<-fitdist(mydat$magnitude,"weibull")
par(mfrow = c(2, 2))
plot.legend <- c("Weibull", "lognormal", "gamma")
denscomp(list(fw, fln, fg), legendtext = plot.legend)
qqcomp(list(fw, fln, fg), legendtext = plot.legend)
cdfcomp(list(fw, fln, fg), legendtext = plot.legend)
ppcomp(list(fw, fln, fg), legendtext = plot.legend)
##Source: https://cran.r-project.org/web/packages/fitdistrplus/vignettes/paper2JSS.pdf
#Check GOF using AIC...
fln$aic
fg$aic
fw$aic
#LgN is lowest, which agrees with visual QQ plot
#Compare simulated vs
par(mfrow = c(1, 1))
set.seed(3)
sim.magnitudes<-rlnorm(10000,meanlog = fln$estimate[1],sdlog = fln$estimate[2])
x<-data.frame(actual=mydat$magnitude,simulated=sim.magnitudes[1:length(mydat$magnitude)])
quake.df<-melt(x) #puts from wide to long format
u <- ggplot(data=quake.df,aes(x=value,fill=variable))+geom_density(alpha=0.25)
u <- u +labs(
title = "Observed vs Simulated LgN",
caption = "Source: http://info.geonet.org.nz/"
)
u <- u + theme_minimal()
u <- u + xlab("Magnitude") + ylab("Density")
grid.arrange(s,t,u)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment