Created
September 21, 2010 06:22
-
-
Save johnDorian/589287 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd("~/Documents/code/francesca") | |
library(TSAgg) | |
quality<-read.csv("Burke_worked.csv",header=T) | |
head(quality) | |
q2<-timeSeries(quality$date,"%d/%m/%Y %H:%M") | |
q2<-data.frame(q2,quality[,3:7]) | |
temp<-q2[q2[,"hour"]==0,] | |
#excludes midnight autosampler samples | |
midnight.wq<-temp[temp[,"Sample.Type"]!="A",] | |
###Sample 100 samples from numbers 1:900 (every hour is 100 values), and each aggregation is 300 values in length. | |
#subsampled<-sample(900,100,replace=T) | |
###After 8am and before 9 am | |
#a<-length(subsampled[subsampled<100]) | |
###After 9am and before 12 pm | |
#b<-length(subsampled[subsampled<400])-a | |
###After 12pm and before 3pm | |
#c<-length(subsampled[subsampled<700])-b-a | |
###After 3pm and before 5pm | |
#d<-length(subsampled)-c-b-a | |
##a;b;c;d | |
##Read the flow file, skip header lines, and not include quality of flow sample column. | |
flow<-read.table("2122052.TXT",header=T,sep=",")[,-3] | |
###Fix the names up | |
names(flow)<-c("date","discharge") | |
flow<-flow[!is.na(flow$discharge),] | |
head(flow) | |
###Install the timeSeries aggregation package | |
flow<-timeSeries(flow$date,"%d/%m/%Y %H:%M",data=flow$discharge) | |
###timesSeries data frame must start at midnight so the merge with the quality data works. | |
flow<-flow[-c(1:8),] | |
head(flow) | |
##The aggregation can take some time for long datasets like this one. | |
flow.3hr<-hoursAgg(flow,sum,3) | |
head(flow.3hr) | |
names(flow.3hr)<-c("date","discharge") | |
back.up<-flow.3hr | |
tail(flow.3hr) | |
###Now use the merge function to join the WQ and the flow data | |
#day.id<-as.numeric(rownames(flow.3hr[(flow.3hr[,1]==midnight.wq[1,1]),])) | |
# | |
#a.data<-data.frame(date=rep(flow.3hr[(day.id+2),1],a),flow=rep(flow.3hr[(day.id+2),2],a)) | |
#b.data<-data.frame(date=rep(flow.3hr[(day.id+3),1],b),flow=rep(flow.3hr[(day.id+3),2],b)) | |
#c.data<-data.frame(date=rep(flow.3hr[(day.id+4),1],c),flow=rep(flow.3hr[(day.id+4),2],c)) | |
#d.data<-data.frame(date=rep(flow.3hr[(day.id+5),1],d),flow=rep(flow.3hr[(day.id+5),2],d)) | |
#sampled.day<-rbind(a.data,b.data,c.data,d.data) | |
#hist(sampled.day$flow) | |
#day.sum<-data.frame(date=flow.3hr[day.id,1],mean=mean(sampled.day$flow),min=min(sampled.day$flow),max=max(sampled.day$flow),median=median(sampled.day$flow),sd=sd(sampled.day$flow),var=var(sampled.day$flow)) | |
###Create a list to save the sampled data into | |
sampled.day<-list() | |
###Create an object to put the | |
day.sum<-NULL | |
###How many samples for each day? | |
samples<-100 | |
###Find out the days without flow data | |
without.flow<-NULL | |
for(i in 1:length(midnight.wq[,1])){ | |
###Create a subsample dataset | |
subsampled<-sample(900,samples,replace=T) | |
##After 8am and before 9 am | |
a<-length(subsampled[subsampled<100]) | |
##After 9am and before 12 pm | |
b<-length(subsampled[subsampled<400])-a | |
##After 12pm and before 3pm | |
c<-length(subsampled[subsampled<700])-b-a | |
##After 3pm and before 5pm | |
d<-length(subsampled)-c-b-a | |
###Make sure there is flow data for this day. | |
if(length( flow.3hr[(as.POSIXlt(flow.3hr[,1],tz="UTC")==as.POSIXlt(as.Date(midnight.wq[i,1]))),1])==1){ | |
####Get the row number of the water quality day in question from the larger flow dataset. | |
day.id<-as.numeric(rownames(flow.3hr[(as.POSIXlt(flow.3hr[,1],tz="UTC")==as.POSIXlt(as.Date(midnight.wq[i,1]))),])) | |
###Create a dataset of the day based on the random samples of each time period. | |
a.data<-data.frame(date=rep(flow.3hr[(day.id+2),1],a),flow=rep(flow.3hr[(day.id+2),2],a)) | |
b.data<-data.frame(date=rep(flow.3hr[(day.id+3),1],b),flow=rep(flow.3hr[(day.id+3),2],b)) | |
c.data<-data.frame(date=rep(flow.3hr[(day.id+4),1],c),flow=rep(flow.3hr[(day.id+4),2],c)) | |
d.data<-data.frame(date=rep(flow.3hr[(day.id+5),1],d),flow=rep(flow.3hr[(day.id+5),2],d)) | |
sampled.day[[i]]<-rbind(a.data,b.data,c.data,d.data) | |
###Add this day summary to the complete output of all the days in question | |
day.sum<-rbind(day.sum,data.frame(date=flow.3hr[day.id,1],mean=mean(sampled.day[[i]]$flow),min=min(sampled.day[[i]]$flow),max=max(sampled.day[[i]]$flow),median=median(sampled.day[[i]]$flow),sd=sd(sampled.day[[i]]$flow),var=var(sampled.day[[i]]$flow))) | |
} else { | |
without.flow<-rbind(without.flow,paste(as.Date(midnight.wq[i,1]))) | |
} | |
} | |
write.csv(day.sum,"Midnight_flow_Burk.csv") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment