Skip to content

Instantly share code, notes, and snippets.

@troyhill
Last active November 25, 2015 22:09
Show Gist options
  • Save troyhill/fc0616464db05fa8e013 to your computer and use it in GitHub Desktop.
Save troyhill/fc0616464db05fa8e013 to your computer and use it in GitHub Desktop.
Code parses monthly National Climatic Data Center Local Climatological Data reports (downloaded as .txt files)
### Script to parse NCDC LCD comma-separated .txt files
parseMonthlyLCD <- function(txt_list) {
# function takes a vector of comma separated filenames for NCDC LCD data (https://www.ncdc.noaa.gov/IPS/lcd/lcd.html)
# returns a summary of monthly meteorological data
# this seems to work at least back to 2009, but NCDC formatting is inconsistent, so use this code with caution
# usage example:
# download .txt files from https://www.ncdc.noaa.gov/IPS/lcd/lcd.html
# input_files <- list.files(path = "C:/RDATA/NCDC/NewOrleans/2013", full.names = TRUE, pattern = ".txt") # find .txts in a directory
# parseMonthlyLCD(input_files)
for (i in 1:length(txt_list)) {
fullData <- suppressWarnings(readLines(txt_list[i]))
# line containing summary data varies depending on the number of days in the month
for (j in 27:37) {
a <- as.numeric(strsplit(as.character(fullData[j]), ",")[[1]][1])
b <- as.numeric(strsplit(as.character(fullData[j - 1]), ",")[[1]][1])
if ((a != (b + 1)) | (is.na(a))) break
}
tempDat <- data.frame(t(as.numeric(strsplit(as.character(fullData[j]), ",")[[1]])))
if (ncol(tempDat) == 20) {
aggData <- tempDat[c(2:4, 6:9, 13:20)]
} else if (ncol(tempDat) > 15) {
length_val <- ncol(tempDat)
aggData <- tempDat[c(2:4, 6:9, (length_val-7):length_val)]
} else if (ncol(tempDat) == 15) {
aggData <- tempDat
} else print(paste0("error: length of line ", j, " is ", ncol(tempDat),
". filename: ", txt_list[i]))
names(aggData) <- c("meanDailyMaxTemp", "meanDailyMinTemp", "meanDailyMeanTemp", "meanDailyDewPt", "meanDailyWetBulb", "meanDailyHDD",
"meanDailyCDD", "totalSnow_wtrEquiv", "totalSnow", "totalPrecip", "meanDailyinHg_stn", "meanDailyinHg_seaLevel",
"meanResultantSpeed", "meanResultantDirection", "meanDailyWindSpeed")
# extract station identifier, located in parentheses
aggData$station <- regmatches(as.character(fullData[2]), gregexpr("(?<=\\().*?(?=\\))", as.character(fullData[2]), perl=T))[[1]]
stnDeets <- strsplit(as.character(fullData[1]), " ")[[1]]
aggData$location <- substr(stnDeets[length(stnDeets)], 1, nchar(stnDeets[length(stnDeets)]) - 1)
aggData$moYr <- as.Date(paste0(substr(stnDeets[1], 2, nchar(stnDeets[1])), "-01"), format = "%B %Y-%d")
if (i > 1) {
output <- rbind(output, aggData)
} else {
output <- aggData
}
}
# convert to metric
output[, 1:5] <- (output[, 1:5] - 32) * 5/9 # convert degrees F to Celsius
output[, 8:10] <- (output[, 8:10] * 2.54) # convert inches to centimeters
output[, c(13, 15)] <- (output[, c(13, 15)] * 0.44704) # convert mph to meters per sec
output
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment