Created
September 22, 2010 00:45
-
-
Save johnDorian/590889 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##################################################################################################### | |
### Aim: To test the significance of randomly allocating flow to 'midnight' water quality samples. | |
### Date Created: Thursday 2nd September 2010 | |
### Author: Jason Lessels | |
### Packages required: TSAgg_0.2-1,geoR | |
### Notes: The script can easily be modified for the other water quality parameters, and the amount of simulations. Things to check: | |
###Both WQ and discharge must have the same initial time stamp (hours) before aggregation. | |
###WQ variable modify lines 52,60,146,186 | |
###line 99 changes the amount of simulations to run. | |
###line 156 determines when the bushfire occurred. | |
###lines 200:220 need to specify the names of the likfit models. | |
###Aggregation, 77,40, and the switch statements in 110:130 - is explained. | |
##################################################################################################### | |
###Load the TSAgg package. | |
install.packages("geoR") | |
install.packages("TSAgg") | |
library(TSAgg) | |
###Set the working dir. | |
setwd("E:/4th YEAR PROJECT/data/Burke") | |
setwd("~/Documents/code/francesca/") | |
###load the quality data | |
quality<-read.csv("Burke_worked.csv",header=T) | |
###What is the format of the dates? | |
head(quality) | |
tail(quality) | |
###Load the flow data - takes awhile | |
flow<-read.table("2122052.TXT",header=T,sep=",")[,-3] | |
###Fix the names up | |
names(flow)<-c("date","discharge") | |
#get rid of missing values | |
flow<-flow[!is.na(flow[,2]),] | |
##No bug But everything is easier to work with when midnight is the first observation. | |
flow<-flow[-c(1:8),] | |
##Check out that all is well | |
head(flow) | |
###Convert the flow data into a timeSeries formatted data frame | |
flow<-timeSeries(flow$date,"%d/%m/%Y %H:%M",data=flow$discharge) | |
###The aggregation can take some time for long datasets like this one. | |
flow.3hr<-hoursAgg(flow,sum,3) | |
####Make sure all is well | |
head(flow.3hr) | |
tail(flow.3hr) | |
##Change the names of the aggregated data frame. | |
names(flow.3hr)<-c("date","discharge") | |
###format the time stamps | |
q2<-timeSeries(quality$date,"%d/%m/%Y %H:%M") | |
###Add the quality values to the new timeSeries object, as the timeSeries function dosen't allow for factors (Sample.Type) | |
q2<-data.frame(q2,quality[,3:7]) | |
###Get rid of any missing TP values, as this is what we are interested in. | |
q2<-q2[!is.na(q2$Phosphorus.Total..mg.L.),] | |
###Subset all samples that are collected between midnight and 12.59.59 | |
temp<-q2[q2[,"hour"]==0,] | |
###Remove all automatically collected samples, as it is highly possible for an automatic sample to have been collected at this time. | |
midnight.wq<-temp[temp[,"Sample.Type"]!="A",] | |
###Create a dataset that is the opposite of the above so the two can be joined after the random samples have been produced. This means that all samples not collected at midnight, or they have been collected using an Automatic sampler. | |
other.wq<-q2[q2[,"hour"]!=0|q2[,"Sample.Type"]=="A",] | |
###Grab the date and the TP column | |
other.wq<-other.wq[,c(1,12)] | |
head(other.wq) | |
tail(other.wq) | |
######################################################### | |
###Test to see if the above separation worked properly | |
###Check that all is well, by checking the lengths of the two subsetted data frames and comparing to that of the quality tp object (q2) | |
length(midnight.wq[,1]) | |
length(other.wq[,1]) | |
##Is the length of all quality samples the same as the length of the problem and non problem samples. | |
length(q2[,1])==(length(other.wq[,1])+length(midnight.wq[,1])) | |
######################################################### | |
###Merge the flow with the water quality that dosent have midnight observations. | |
###Sort the dates so they are in chronological order. | |
other.wq<-other.wq[order(other.wq[,1]),] | |
###Format the dates so we can change the hour for merging with flow | |
qual<-timeSeries(other.wq$dates,"%Y-%m-%d %H:%M:%S") | |
###Get the aggregated hourly block that the quality observation falls into in respect to the hourly aggregation of the flow. | |
temphour<-floor(qual$hour/3)*3 | |
###Create a temp time stamp for merging with the flow - this will be changed back after the merging takes place | |
temptime<-strptime(paste(qual$day,qual$month,qual$year,temphour,0),"%d %m %Y %H %M",tz="GMT") | |
#Create a temp qual object for th merging with the new aggregated dates. | |
tempqual<-data.frame(date=temptime,tp=other.wq[,2]) | |
###Merge the quality with flow | |
temp<-merge(flow.3hr,tempqual,by="date",all.y=T) | |
#Now change the dates back to the original time stamp of the quality observation | |
temp$date<-qual$date | |
###Now get rid of the missing flow values. | |
other.wq<-temp[!is.na(temp[,2]),] | |
############################################################################################################# | |
##########Below randomly allocates a discharge value from the aggregated flow data, between 8am and 5pm.##### | |
############################################################################################################# | |
###This sets how many realisations you want this can be changed to anything you want... | |
simulations<-100 | |
###This creates a matrix for the randomly sampled flow | |
new.data<-matrix(ncol=simulations,nrow=length(midnight.wq[,1])) | |
###Create an object for the dates that the sampled flow represents. | |
dates<-matrix(ncol=simulations,nrow=length(midnight.wq[,1])) | |
##Cycle through each of the midnight samples | |
for(i in 1:length(midnight.wq[,1])){ | |
#Check to see if flow is recorded on this day and only continue if it is. | |
if(length(flow.3hr[(flow.3hr[,1]==as.POSIXct(as.Date(midnight.wq[i,1]))),1])==1){ | |
#Get the row number in the larger flow dataset, for midnight. | |
day.id<-as.numeric(rownames(flow.3hr[(flow.3hr[,1]==as.POSIXct(as.Date(midnight.wq[i,1]))),])) | |
#Random sample which hourly aggregated block to use, do this for the length of simulations. | |
#|1|2,2,2|3,3,3|4,4| = |8am|9am,10am,11am|12pm,1pm,2pm|3pm,4pm| - therefore each hour has equal probabilty, but the blocks arent of equal probablity. | |
sampled<-sample(c(1,2,2,2,3,3,3,4,4),simulations,replace=T) | |
#Now cycle through the cols of the current midnight sample (i) and sample based on the sampled value for this col. | |
for(j in 1:simulations){ | |
###Use a switch statement. There nice and quick and the one slected is based on the sampled value of this col. | |
#First switch statment gets the randomly allocated flow value. | |
switch(sampled[j], | |
new.data[i,j]<-flow.3hr[(day.id+2),2], | |
new.data[i,j]<-flow.3hr[(day.id+3),2], | |
new.data[i,j]<-flow.3hr[(day.id+4),2], | |
new.data[i,j]<-flow.3hr[(day.id+5),2] | |
) | |
#Second switch statement gets the time of the randomly allocated flow. | |
switch(sampled[j], | |
dates[i,j]<-paste(flow.3hr[(day.id+2),1]), | |
dates[i,j]<-paste(flow.3hr[(day.id+3),1]), | |
dates[i,j]<-paste(flow.3hr[(day.id+4),1]), | |
dates[i,j]<-paste(flow.3hr[(day.id+5),1]) | |
) | |
} | |
} | |
} | |
############Now merge the newly randomly created value with the good values. | |
################################ | |
### Put it all back together ### | |
################################ | |
##Create a list for to store the simulated data for each realisation | |
all.output<-list() | |
for(i in 1:simulations){ | |
#Create the dataset for this realisation | |
sim.<-data.frame(date=dates[,i],discharge=new.data[,i],tp=midnight.wq$Phosphorus.Total..mg.L.) | |
#Get rid of the missing values | |
sim.<-sim.[!is.na(sim.[,2]),] | |
#join this realisation with the unchanged observations | |
together<-rbind(other.wq,sim.) | |
#Now get the dates in order. | |
together<-together[order(together[,1]),] | |
together$numericdist<-as.numeric(difftime(together[,1],together[1,1],units="days")) | |
together$bush.fire<-as.numeric(together[,1]>as.POSIXct("2002-01-01 00:00:00")) | |
all.output[[i]]<-together | |
} | |
########################################################## | |
########################################################## | |
################### MODEL FITTING ######################## | |
########################################################## | |
########################################################## | |
#####First trend is just flow | |
ini.<-matrix(NA,ncol=2,nrow=7) | |
ini.[,1]=c(0.1,0.1,0.1,0.1,0.1,0.1,0.1) | |
ini.[,2]=c(2,3,4,5,6,7,8) | |
likfitmods<-list() | |
likfitmods[["flowspatial"]]<-vector() | |
likfitmods[["firespatial"]]<-vector() | |
likfitmods[["flowfirespatial"]]<-vector() | |
Geo<-list() | |
library(geoR) | |
for(i in 1:simulations){#testing | |
#Get lambda values for the boxcox transformtion | |
#lambda<-boxcox.fit(all.output[[i]]$tp)$lambda | |
#flowlambda<-boxcox.fit(all.output[[i]]$discharge)$lambda | |
##Using a log transform change lambda to 0 for log transform. | |
lambda=0 | |
flowlambda=0 | |
#Create a data frame for creating geoR object | |
tempData<-data.frame(x=all.output[[i]]$numericdist,y=1,tp=all.output[[i]]$tp,flw=BCtransform(all.output[[i]]$discharge,flowlambda)$data,bfire=as.factor(all.output[[i]]$bush.fire)) | |
#Get rid of any duplicates using the jitter function. Note that the latest geoR package is required. | |
tempData[,1:2]<-jitterDupCoords(tempData[,1:2],max=0.01) | |
#Create a geodata object of the data. | |
Geo[[i]]<-as.geodata(tempData,coords.col=1:2,data.col=3,covar.col=4:5) | |
###Plot a variogram of the data, using lambda to transform the data | |
#plot(variog(tempGeo,lambda=lambda,max.dist=30,trend=~fre)) | |
###Use likfit to optimise the variogram model. | |
############################## | |
############################## | |
####### The models ########### | |
############################## | |
############################## | |
##The flow model | |
likfitmods[["flow"]][[i]]<-likfit(Geo[[i]],ini=ini.,fix.nugget = F,fix.lambda=T, lik.method = "REML",lambda=lambda,trend=trend.spatial(~flw,Geo[[i]]),cov.model="matern") | |
#Check to see if the model has a temporal trend. | |
if(summary(likfitmods[["flow"]][[i]])$likelihood$AIC<summary(likfitmods[["flow"]][[i]])$ns.likelihood$AIC)likfitmods[["flowspatial"]][i]=1 else likfitmods[["flowspatial"]][i]<-0 | |
##The fire model | |
likfitmods[["fire"]][[i]]<-likfit(Geo[[i]],ini=ini.,fix.nugget = F,fix.lambda=T, lik.method = "REML",lambda=lambda,trend=trend.spatial(~flw+bfire,Geo[[i]]),cov.model="matern") | |
#Check to see if the model has a temporal trend. | |
if(summary(likfitmods[["fire"]][[i]])$likelihood$AIC<summary(likfitmods[["fire"]][[i]])$ns.likelihood$AIC)likfitmods[["firespatial"]][i]=1 else likfitmods[["firespatial"]][i]<-0 | |
##The flow and fire model | |
likfitmods[["flowfire"]][[i]]<-likfit(Geo[[i]],ini=ini.,fix.nugget = F,fix.lambda=T, lik.method = "REML",lambda=lambda,trend=trend.spatial(~flw*bfire,Geo[[i]]),cov.model="matern") | |
#Check to see if the model has a temporal trend. | |
if(summary(likfitmods[["flowfire"]][[i]])$likelihood$AIC<summary(likfitmods[["flowfire"]][[i]])$ns.likelihood$AIC)likfitmods[["flowfirespatial"]][i]=1 else likfitmods[["flowfirespatial"]][i]<-0 | |
cat("We are now up to", i,"\n") | |
# | |
print(i) | |
} | |
###Save the output | |
save(likfitmods,file="likfitmods_Burke_TP.Rdata") | |
save(all.output,file="simulated_data_Burke_TP.Rdata") | |
save(Geo,file="JitteredGeoObjects_Burke_TP.Rdata") | |
###Get the P values and determine if the interaction is significant. | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment