Last active
November 25, 2015 22:09
-
-
Save troyhill/fc0616464db05fa8e013 to your computer and use it in GitHub Desktop.
Code parses monthly National Climatic Data Center Local Climatological Data reports (downloaded as .txt files)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Script to parse NCDC LCD comma-separated .txt files | |
parseMonthlyLCD <- function(txt_list) { | |
# function takes a vector of comma separated filenames for NCDC LCD data (https://www.ncdc.noaa.gov/IPS/lcd/lcd.html) | |
# returns a summary of monthly meteorological data | |
# this seems to work at least back to 2009, but NCDC formatting is inconsistent, so use this code with caution | |
# usage example: | |
# download .txt files from https://www.ncdc.noaa.gov/IPS/lcd/lcd.html | |
# input_files <- list.files(path = "C:/RDATA/NCDC/NewOrleans/2013", full.names = TRUE, pattern = ".txt") # find .txts in a directory | |
# parseMonthlyLCD(input_files) | |
for (i in 1:length(txt_list)) { | |
fullData <- suppressWarnings(readLines(txt_list[i])) | |
# line containing summary data varies depending on the number of days in the month | |
for (j in 27:37) { | |
a <- as.numeric(strsplit(as.character(fullData[j]), ",")[[1]][1]) | |
b <- as.numeric(strsplit(as.character(fullData[j - 1]), ",")[[1]][1]) | |
if ((a != (b + 1)) | (is.na(a))) break | |
} | |
tempDat <- data.frame(t(as.numeric(strsplit(as.character(fullData[j]), ",")[[1]]))) | |
if (ncol(tempDat) == 20) { | |
aggData <- tempDat[c(2:4, 6:9, 13:20)] | |
} else if (ncol(tempDat) > 15) { | |
length_val <- ncol(tempDat) | |
aggData <- tempDat[c(2:4, 6:9, (length_val-7):length_val)] | |
} else if (ncol(tempDat) == 15) { | |
aggData <- tempDat | |
} else print(paste0("error: length of line ", j, " is ", ncol(tempDat), | |
". filename: ", txt_list[i])) | |
names(aggData) <- c("meanDailyMaxTemp", "meanDailyMinTemp", "meanDailyMeanTemp", "meanDailyDewPt", "meanDailyWetBulb", "meanDailyHDD", | |
"meanDailyCDD", "totalSnow_wtrEquiv", "totalSnow", "totalPrecip", "meanDailyinHg_stn", "meanDailyinHg_seaLevel", | |
"meanResultantSpeed", "meanResultantDirection", "meanDailyWindSpeed") | |
# extract station identifier, located in parentheses | |
aggData$station <- regmatches(as.character(fullData[2]), gregexpr("(?<=\\().*?(?=\\))", as.character(fullData[2]), perl=T))[[1]] | |
stnDeets <- strsplit(as.character(fullData[1]), " ")[[1]] | |
aggData$location <- substr(stnDeets[length(stnDeets)], 1, nchar(stnDeets[length(stnDeets)]) - 1) | |
aggData$moYr <- as.Date(paste0(substr(stnDeets[1], 2, nchar(stnDeets[1])), "-01"), format = "%B %Y-%d") | |
if (i > 1) { | |
output <- rbind(output, aggData) | |
} else { | |
output <- aggData | |
} | |
} | |
# convert to metric | |
output[, 1:5] <- (output[, 1:5] - 32) * 5/9 # convert degrees F to Celsius | |
output[, 8:10] <- (output[, 8:10] * 2.54) # convert inches to centimeters | |
output[, c(13, 15)] <- (output[, c(13, 15)] * 0.44704) # convert mph to meters per sec | |
output | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment