johnDorian · September 21, 2010 06:22
diff --git a/midnight_burke.R b/midnight_burke.R
 setwd("~/Documents/code/francesca")

 library(TSAgg)

 quality<-read.csv("Burke_worked.csv",header=T)

 head(quality)

 q2<-timeSeries(quality$date,"%d/%m/%Y %H:%M")

 q2<-data.frame(q2,quality[,3:7])

 temp<-q2[q2[,"hour"]==0,]

 #excludes midnight autosampler samples

 midnight.wq<-temp[temp[,"Sample.Type"]!="A",]





 ###Sample 100 samples from numbers 1:900 (every hour is 100 values), and each aggregation is 300 values in length.

 #subsampled<-sample(900,100,replace=T)

 ###After 8am and before 9 am

 #a<-length(subsampled[subsampled<100])

 ###After 9am and before 12 pm

 #b<-length(subsampled[subsampled<400])-a

 ###After 12pm and before 3pm

 #c<-length(subsampled[subsampled<700])-b-a

 ###After 3pm and before 5pm

 #d<-length(subsampled)-c-b-a

 ##a;b;c;d



 ##Read the flow file, skip header lines, and not include quality of flow sample column.
 flow<-read.table("2122052.TXT",header=T,sep=",")[,-3]

 ###Fix the names up

 names(flow)<-c("date","discharge")

 flow<-flow[!is.na(flow$discharge),]

 head(flow)





 ###Install the timeSeries aggregation package

 flow<-timeSeries(flow$date,"%d/%m/%Y %H:%M",data=flow$discharge)

 ###timesSeries data frame must start at midnight so the merge with the quality data works.

 flow<-flow[-c(1:8),]

 head(flow)

 ##The aggregation can take some time for long datasets like  this one.

 flow.3hr<-hoursAgg(flow,sum,3)

 head(flow.3hr)

 names(flow.3hr)<-c("date","discharge")

 back.up<-flow.3hr

 tail(flow.3hr)

 ###Now use the merge function to join  the WQ and the flow data





 #day.id<-as.numeric(rownames(flow.3hr[(flow.3hr[,1]==midnight.wq[1,1]),]))

 #

 #a.data<-data.frame(date=rep(flow.3hr[(day.id+2),1],a),flow=rep(flow.3hr[(day.id+2),2],a))

 #b.data<-data.frame(date=rep(flow.3hr[(day.id+3),1],b),flow=rep(flow.3hr[(day.id+3),2],b))

 #c.data<-data.frame(date=rep(flow.3hr[(day.id+4),1],c),flow=rep(flow.3hr[(day.id+4),2],c))

 #d.data<-data.frame(date=rep(flow.3hr[(day.id+5),1],d),flow=rep(flow.3hr[(day.id+5),2],d))

 #sampled.day<-rbind(a.data,b.data,c.data,d.data)

 #hist(sampled.day$flow)

 #day.sum<-data.frame(date=flow.3hr[day.id,1],mean=mean(sampled.day$flow),min=min(sampled.day$flow),max=max(sampled.day$flow),median=median(sampled.day$flow),sd=sd(sampled.day$flow),var=var(sampled.day$flow))







 ###Create a list to save the sampled data into

 sampled.day<-list()

 ###Create an object to put the

 day.sum<-NULL

 ###How many samples for each day?

 samples<-100

 ###Find out the days without flow data

 without.flow<-NULL

 for(i in 1:length(midnight.wq[,1])){

 	###Create a subsample dataset

 	subsampled<-sample(900,samples,replace=T)

 	##After 8am and before 9 am

 	a<-length(subsampled[subsampled<100])

 	##After 9am and before 12 pm

 	b<-length(subsampled[subsampled<400])-a

 	##After 12pm and before 3pm

 	c<-length(subsampled[subsampled<700])-b-a

 	##After 3pm and before 5pm

 	d<-length(subsampled)-c-b-a

 	###Make sure there is flow data for this day.

  if(length( flow.3hr[(as.POSIXlt(flow.3hr[,1],tz="UTC")==as.POSIXlt(as.Date(midnight.wq[i,1]))),1])==1){

  	####Get the row number of the water quality day in question from the larger flow dataset.

 	 day.id<-as.numeric(rownames(flow.3hr[(as.POSIXlt(flow.3hr[,1],tz="UTC")==as.POSIXlt(as.Date(midnight.wq[i,1]))),]))



 	###Create a dataset of the day based on the random samples of each time period.

 	 a.data<-data.frame(date=rep(flow.3hr[(day.id+2),1],a),flow=rep(flow.3hr[(day.id+2),2],a))

 	 b.data<-data.frame(date=rep(flow.3hr[(day.id+3),1],b),flow=rep(flow.3hr[(day.id+3),2],b))

 	 c.data<-data.frame(date=rep(flow.3hr[(day.id+4),1],c),flow=rep(flow.3hr[(day.id+4),2],c))

 	 d.data<-data.frame(date=rep(flow.3hr[(day.id+5),1],d),flow=rep(flow.3hr[(day.id+5),2],d))

 	 sampled.day[[i]]<-rbind(a.data,b.data,c.data,d.data)



 	 ###Add this day summary to the complete output of all the days in question

 	 day.sum<-rbind(day.sum,data.frame(date=flow.3hr[day.id,1],mean=mean(sampled.day[[i]]$flow),min=min(sampled.day[[i]]$flow),max=max(sampled.day[[i]]$flow),median=median(sampled.day[[i]]$flow),sd=sd(sampled.day[[i]]$flow),var=var(sampled.day[[i]]$flow)))

 	 } else  {

 	   without.flow<-rbind(without.flow,paste(as.Date(midnight.wq[i,1])))

  }

 }





 write.csv(day.sum,"Midnight_flow_Burk.csv")
	setwd("~/Documents/code/francesca")

	library(TSAgg)

	quality<-read.csv("Burke_worked.csv",header=T)

	head(quality)

	q2<-timeSeries(quality$date,"%d/%m/%Y %H:%M")

	q2<-data.frame(q2,quality[,3:7])

	temp<-q2[q2[,"hour"]==0,]

	#excludes midnight autosampler samples

	midnight.wq<-temp[temp[,"Sample.Type"]!="A",]





	###Sample 100 samples from numbers 1:900 (every hour is 100 values), and each aggregation is 300 values in length.

	#subsampled<-sample(900,100,replace=T)

	###After 8am and before 9 am

	#a<-length(subsampled[subsampled<100])

	###After 9am and before 12 pm

	#b<-length(subsampled[subsampled<400])-a

	###After 12pm and before 3pm

	#c<-length(subsampled[subsampled<700])-b-a

	###After 3pm and before 5pm

	#d<-length(subsampled)-c-b-a

	##a;b;c;d



	##Read the flow file, skip header lines, and not include quality of flow sample column.
	flow<-read.table("2122052.TXT",header=T,sep=",")[,-3]

	###Fix the names up

	names(flow)<-c("date","discharge")

	flow<-flow[!is.na(flow$discharge),]

	head(flow)





	###Install the timeSeries aggregation package

	flow<-timeSeries(flow$date,"%d/%m/%Y %H:%M",data=flow$discharge)

	###timesSeries data frame must start at midnight so the merge with the quality data works.

	flow<-flow[-c(1:8),]

	head(flow)

	##The aggregation can take some time for long datasets like this one.

	flow.3hr<-hoursAgg(flow,sum,3)

	head(flow.3hr)

	names(flow.3hr)<-c("date","discharge")

	back.up<-flow.3hr

	tail(flow.3hr)

	###Now use the merge function to join the WQ and the flow data





	#day.id<-as.numeric(rownames(flow.3hr[(flow.3hr[,1]==midnight.wq[1,1]),]))

	#

	#a.data<-data.frame(date=rep(flow.3hr[(day.id+2),1],a),flow=rep(flow.3hr[(day.id+2),2],a))

	#b.data<-data.frame(date=rep(flow.3hr[(day.id+3),1],b),flow=rep(flow.3hr[(day.id+3),2],b))

	#c.data<-data.frame(date=rep(flow.3hr[(day.id+4),1],c),flow=rep(flow.3hr[(day.id+4),2],c))

	#d.data<-data.frame(date=rep(flow.3hr[(day.id+5),1],d),flow=rep(flow.3hr[(day.id+5),2],d))

	#sampled.day<-rbind(a.data,b.data,c.data,d.data)

	#hist(sampled.day$flow)

	#day.sum<-data.frame(date=flow.3hr[day.id,1],mean=mean(sampled.day$flow),min=min(sampled.day$flow),max=max(sampled.day$flow),median=median(sampled.day$flow),sd=sd(sampled.day$flow),var=var(sampled.day$flow))







	###Create a list to save the sampled data into

	sampled.day<-list()

	###Create an object to put the

	day.sum<-NULL

	###How many samples for each day?

	samples<-100

	###Find out the days without flow data

	without.flow<-NULL

	for(i in 1:length(midnight.wq[,1])){

	###Create a subsample dataset

	subsampled<-sample(900,samples,replace=T)

	##After 8am and before 9 am

	a<-length(subsampled[subsampled<100])

	##After 9am and before 12 pm

	b<-length(subsampled[subsampled<400])-a

	##After 12pm and before 3pm

	c<-length(subsampled[subsampled<700])-b-a

	##After 3pm and before 5pm

	d<-length(subsampled)-c-b-a

	###Make sure there is flow data for this day.

	if(length( flow.3hr[(as.POSIXlt(flow.3hr[,1],tz="UTC")==as.POSIXlt(as.Date(midnight.wq[i,1]))),1])==1){

	####Get the row number of the water quality day in question from the larger flow dataset.

	day.id<-as.numeric(rownames(flow.3hr[(as.POSIXlt(flow.3hr[,1],tz="UTC")==as.POSIXlt(as.Date(midnight.wq[i,1]))),]))



	###Create a dataset of the day based on the random samples of each time period.

	a.data<-data.frame(date=rep(flow.3hr[(day.id+2),1],a),flow=rep(flow.3hr[(day.id+2),2],a))

	b.data<-data.frame(date=rep(flow.3hr[(day.id+3),1],b),flow=rep(flow.3hr[(day.id+3),2],b))

	c.data<-data.frame(date=rep(flow.3hr[(day.id+4),1],c),flow=rep(flow.3hr[(day.id+4),2],c))

	d.data<-data.frame(date=rep(flow.3hr[(day.id+5),1],d),flow=rep(flow.3hr[(day.id+5),2],d))

	sampled.day[[i]]<-rbind(a.data,b.data,c.data,d.data)



	###Add this day summary to the complete output of all the days in question

	day.sum<-rbind(day.sum,data.frame(date=flow.3hr[day.id,1],mean=mean(sampled.day[[i]]$flow),min=min(sampled.day[[i]]$flow),max=max(sampled.day[[i]]$flow),median=median(sampled.day[[i]]$flow),sd=sd(sampled.day[[i]]$flow),var=var(sampled.day[[i]]$flow)))

	} else {

	without.flow<-rbind(without.flow,paste(as.Date(midnight.wq[i,1])))

	}

	}





	write.csv(day.sum,"Midnight_flow_Burk.csv")