Last active
October 11, 2018 16:22
-
-
Save troyhill/cf6e030d8d3575d482928748c6daeb8b to your computer and use it in GitHub Desktop.
Script to reformat water level predictions from Thai Navy monitoring stations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### script to reformat water level predictions from Thai Navy monitoring stations | |
reformatData <- function(filename, timezone = "GMT") { | |
### filename = location of tab-delimited text file with Thai Navy water level predictions | |
### timezone = I'm not sure how to refer to the local time zone, so I used GMT to at least avoid daylight savings time issues | |
### usage example | |
# UdayDat <- "C:/RDATA/Thailand/data/data_waterLevel2018.txt" | |
# dat1 <- reformatData(filename = UdayDat) | |
# tail(dat1) | |
### check for reshape2, install if not available | |
list.of.packages <- c("reshape2") | |
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] | |
if(length(new.packages)) install.packages(new.packages) | |
wtr <- read.delim(filename, stringsAsFactors = FALSE) | |
### identify rows where months begin using "DATE" located above days | |
month.start <- c(grep(pattern = "DATE", x = wtr$X), nrow(wtr) + 8 ) | |
month.name.col <- wtr$X121 | |
months.in.dataset <- month.name.col[grep(x = month.name.col, pattern = paste(month.name, collapse = "|"))] | |
### process month by month | |
for (j in 1:length(months.in.dataset)) { | |
monthName <- months.in.dataset[j] | |
sub1 <- wtr[(month.start[j] + 2):(month.start[j+1] - 8), c(1, 3:26)] | |
sub1 <- sub1[complete.cases(sub1), ] | |
names(sub1) <- c("day", paste0("h", 0:23)) | |
for (i in 1:ncol(sub1)) { | |
sub1[, i] <- as.numeric(as.character(sub1[, i])) | |
} | |
sub2 <- reshape2::melt(data = sub1, id.vars = "day", value.name = "meters.MLLW") | |
sub2$hour <- as.numeric(substr(sub2$variable, 2, 3)) | |
sub2$datetime <- as.POSIXct(paste(sep = " ", monthName, sub2$day, sub2$hour), format = "%B %Y %d %H", tz = timezone) | |
finSub <- sub2[, c("datetime", "meters.MLLW")] | |
if (j == 1) { | |
outDat <- finSub | |
} else { | |
outDat <- rbind(outDat, finSub) | |
} | |
} | |
outDat <- outDat[order(outDat$datetime), ] | |
row.names(outDat) <- 1:nrow(outDat) | |
invisible(outDat) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment