Last active
February 20, 2017 04:56
-
-
Save actuaryactually/a24304a074722ae5ac3452d2cef62c4a to your computer and use it in GitHub Desktop.
Quake Visualisations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#PRELIMINARIES: | |
install.packages("pacman") | |
pacman::p_load(XML,rvest,magrittr,RCurl,stringi) | |
#Step 1: Establish location names and addresses for Harvey Norman Electrical stores | |
#draw in address locations from website: | |
url<-"http://www.harveynorman.co.nz/store-finder.html" | |
rvest_target<-read_html(url) | |
rvest_table_nodes <- html_nodes(rvest_target,"p.address ") | |
addresses<-html_text(rvest_table_nodes,TRUE) | |
##Note - https://cran.r-project.org/web/packages/rvest/vignettes/selectorgadget.html | |
## This link contains details on how to use the SelectorGadget tool to identify CSS markers on webpages | |
#remove nearly all html carriage returns and formatting | |
addresses<-strsplit(addresses," \r\n\t") | |
#write output to a matrix in which view map and other spaces are removed: | |
size.reqd<-length(addresses) | |
output.addresses<- matrix(nrow=size.reqd,ncol=1) | |
for (i in 1:length(addresses)){ | |
output.addresses[i]<-unlist(strsplit(paste0(unlist(addresses[i]),sep = ", ", collapse = ""),"View Map,")) | |
} | |
print(output.addresses) | |
#scan output for errors then manually correct: NB rows 2 and 37 | |
#row 2 first: | |
temp<-paste0(unlist(strsplit(output.addresses[2],"\r\n\t")),sep = ", ", collapse = "") | |
nchar(temp) #66 characters long, so retain all but last two to purge extra comma | |
output.addresses[2]= substr(temp,start=1, stop=64) | |
rm(temp) | |
#then correct row 37: | |
temp<-paste0(unlist(strsplit(output.addresses[37],"\r\n")),sep = ", ", collapse = "") | |
nchar(temp) #66 characters long, so retain all but last two to purge extra comma | |
output.addresses[37]= substr(temp,start=1, stop=46) | |
rm(temp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#PRELIMINARIES: | |
pacman::p_load(XML) | |
#recode as data frame : | |
output.addresses.df<-data.frame(c(output.addresses)) | |
output.addresses.df<-cbind(output.addresses.df,long=1,lat=2) | |
#assign long/lat | |
for (i in 1:length(output.addresses)){ | |
addr <-output.addresses[i] | |
url = paste('http://maps.google.com/maps/api/geocode/xml?address=', addr,'&sensor=false',sep='') # construct the URL | |
doc = xmlTreeParse(url) | |
root = xmlRoot(doc) | |
output.addresses.df[i,2] = xmlValue(root[['result']][['geometry']][['location']][['lng']]) | |
output.addresses.df[i,3] = xmlValue(root[['result']][['geometry']][['location']][['lat']]) | |
} | |
print(output.addresses.df) | |
#row 35 and 38 have problems - 35 is returning something with long/lats that are way off; and 38 is throwing an error. | |
#Manually searching google maps shows that the value for this row, needs a slight change. Overwriting using the following | |
#approach solves the problem.... | |
addr <-" Cnr West Street & Moore Street, Ashburton" | |
url = paste('http://maps.google.com/maps/api/geocode/xml?address=', addr,'&sensor=false',sep='') # construct the URL | |
doc = xmlTreeParse(url) | |
root = xmlRoot(doc) | |
output.addresses.df[35,2] = xmlValue(root[['result']][['geometry']][['location']][['lng']]) | |
output.addresses.df[35,3] = xmlValue(root[['result']][['geometry']][['location']][['lat']]) | |
addr <-"Cnr Maclaggan Street & Rattray Street, Dunedin" | |
url = paste('http://maps.google.com/maps/api/geocode/xml?address=', addr,'&sensor=false',sep='') # construct the URL | |
doc = xmlTreeParse(url) | |
root = xmlRoot(doc) | |
output.addresses.df[38,2] = xmlValue(root[['result']][['geometry']][['location']][['lng']]) | |
output.addresses.df[38,3] = xmlValue(root[['result']][['geometry']][['location']][['lat']]) | |
#all locations assigned now: | |
print(output.addresses.df) | |
#format long lat as numbers (needed later) | |
colnames(output.addresses.df)<-c("Address","long","lat") | |
output.addresses.df$Address<-as.character(output.addresses.df$Address) | |
output.addresses.df$long<-as.numeric(output.addresses.df$long) | |
output.addresses.df$lat<-as.numeric(output.addresses.df$lat) | |
#major towns | |
town.names<-c("Wellington, New Zealand", | |
"Picton, New Zealand", | |
"Nelson, New Zealand", | |
"Blenheim, New Zealand") | |
major.towns.df<-data.frame(town.names) | |
major.towns.df<-cbind(major.towns.df,long=1,lat=2) | |
for (i in 1:length(town.names)){ | |
addr <-town.names[i] | |
url = paste('http://maps.google.com/maps/api/geocode/xml?address=', addr,'&sensor=false',sep='') # construct the URL | |
doc = xmlTreeParse(url) | |
root = xmlRoot(doc) | |
major.towns.df[i,2] = xmlValue(root[['result']][['geometry']][['location']][['lng']]) | |
major.towns.df[i,3] = xmlValue(root[['result']][['geometry']][['location']][['lat']]) | |
} | |
print(major.towns.df) | |
town.names.short<-c("Wellington", | |
"Picton", | |
"Nelson", | |
"Blenheim") | |
major.towns.df$town.names<-town.names.short | |
major.towns.df$long<-as.numeric(major.towns.df$long) | |
major.towns.df$lat<-as.numeric(major.towns.df$lat) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#PRELIMINARIES: | |
pacman::p_load(data.table) | |
mydat1 <- fread('http://quakesearch.geonet.org.nz/csv?region=wellington&minmag=1&startdate=2014-05-01&enddate=2017-1-30T17:00:00') | |
head(mydat) | |
#the following site contains historic earthquake data for New Zealand: http://quakesearch.geonet.org.nz/ | |
#Using the build query option on this page, we can look-up quakes in specific regions and time frames. | |
#Let's look up Wellington's seismic profile since 1 Jan 2006 - i.e. around 10 years' worth... | |
#three links are indicated, each covering a non overlapping date ranges: | |
url1 <- 'http://quakesearch.geonet.org.nz/csv?region=wellington&minmag=1&startdate=2014-05-01&enddate=2017-1-30T17:00:00' | |
url2 <- 'http://quakesearch.geonet.org.nz/csv?region=wellington&minmag=1&startdate=2011-07-01&enddate=2014-05-01' | |
url3 <- 'http://quakesearch.geonet.org.nz/csv?region=wellington&minmag=1&startdate=2011-01-01T16:00:00&enddate=2011-07-01' | |
mydat1 <- fread(url1) | |
mydat2 <- fread(url2) | |
mydat3 <- fread(url3) | |
mydat <- rbind(mydat1,mydat2,mydat3) | |
#review download then tidy up: | |
head(mydat) | |
rm(mydat1,mydat2,mydat3) | |
colnames(mydat)[5] <- "long" | |
colnames(mydat)[6] <- "lat" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pacman::p_load(ggmap,fitdistrplus,maps,mapproj,ggrepel) | |
wellington = c(lon = 174.7762, lat = -41.28646) | |
wellington.map=get_map(location=wellington,zoom=8,color="bw") | |
r <- ggmap(wellington.map,extent="panel",maprange=FALSE)%+% mydat + aes(x = long, y = lat) | |
r <- r + geom_density2d() | |
r <- r + stat_density2d(aes(fill = ..level.., alpha = ..level..), size = 0.1, bins = 100, geom = 'polygon') | |
r <- r + scale_fill_gradient(low = "green", high = "red") + scale_alpha(range = c(0.00, 0.25), guide = FALSE) | |
r <- r + geom_point(data=output.addresses.df, aes(x=long, y=lat), | |
color="blue",shape=15) | |
r <- r + labs( | |
title = "NZ EQ hotspots 1 Jan 2006 - Present Vs Exposure: (i.e. location of Harvey Norman Store Outlets)", | |
subtitle = "The Blenheim store appears to be closest to an active zone", | |
caption = "Sources: http://info.geonet.org.nz/ and http://www.harveynorman.co.nz/store-finder.html") | |
r |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pacman::p_load(fitdistrplus,reshape2,grid,gridExtra) | |
#Note - you need to have already run the prior 4 scripts and have the variables in memory | |
#severity over time: | |
s <- ggplot(data=mydat,aes(x=as.Date(origintime),y=magnitude)) | |
s <- s + geom_point(alpha=1/25,color="coral1",shape=16) + geom_smooth(span=0.3,se=TRUE) | |
s <- s + labs( | |
title = "Over Time...", | |
subtitle = "One circle per quake. 25 quakes to produce a solid pink circle. Blue line shows smoothed average (GAM model)", | |
caption = "Source: http://info.geonet.org.nz/" | |
) | |
s <- s + xlab("") + ylab("Magnitude") | |
s <- s + theme_minimal() | |
#severity - not, you need to experiment with the binning to see the shape clearly | |
t <- ggplot(data=mydat,aes(x=magnitude)) | |
t <- t + geom_histogram(binwidth = 0.1) + geom_histogram(aes(fill = ..count..)) + scale_fill_gradient("Magnitude", low = "green", high = "red") | |
t <- t +labs( | |
title = "Observed variability", | |
caption = "Source: http://info.geonet.org.nz/" | |
) | |
t <- t + theme_minimal() | |
t <- t + xlab("Magnitude") + ylab("Count") | |
#Try fitting the severity using parametric distribution: | |
fg<-fitdist(mydat$magnitude,"gamma") | |
fln<-fitdist(mydat$magnitude,"lnorm") | |
fw<-fitdist(mydat$magnitude,"weibull") | |
par(mfrow = c(2, 2)) | |
plot.legend <- c("Weibull", "lognormal", "gamma") | |
denscomp(list(fw, fln, fg), legendtext = plot.legend) | |
qqcomp(list(fw, fln, fg), legendtext = plot.legend) | |
cdfcomp(list(fw, fln, fg), legendtext = plot.legend) | |
ppcomp(list(fw, fln, fg), legendtext = plot.legend) | |
##Source: https://cran.r-project.org/web/packages/fitdistrplus/vignettes/paper2JSS.pdf | |
#Check GOF using AIC... | |
fln$aic | |
fg$aic | |
fw$aic | |
#LgN is lowest, which agrees with visual QQ plot | |
#Compare simulated vs | |
par(mfrow = c(1, 1)) | |
set.seed(3) | |
sim.magnitudes<-rlnorm(10000,meanlog = fln$estimate[1],sdlog = fln$estimate[2]) | |
x<-data.frame(actual=mydat$magnitude,simulated=sim.magnitudes[1:length(mydat$magnitude)]) | |
quake.df<-melt(x) #puts from wide to long format | |
u <- ggplot(data=quake.df,aes(x=value,fill=variable))+geom_density(alpha=0.25) | |
u <- u +labs( | |
title = "Observed vs Simulated LgN", | |
caption = "Source: http://info.geonet.org.nz/" | |
) | |
u <- u + theme_minimal() | |
u <- u + xlab("Magnitude") + ylab("Density") | |
grid.arrange(s,t,u) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment