jlehtoma · December 17, 2015 07:49
diff --git a/read_weather_data_table.R b/read_weather_data_table.R
 library(data.table)

 read.weather.data.table <- function(folder, hilas, jdate.start, jdate.end, years='All') {
  
  # List all the csv files in the folder
  csv.files <- list.files(folder, "\\.csv$", full.names = TRUE)
  
  # Create an empty list to hold the content of each csv file
  csv.files.data <- list()
  
  col.names = c("hila", "year", "x1", "x2", "temp", "rain", "jdate")
  
  # Loop through all the csv files
  for (csv.file in csv.files) {
    # Read the content
    dat <- fread(csv.file)
    setnames(dat, 1:length(col.names), col.names)
    
    #subset only years that we need
    #dat<-subset(dat,year >= year.start & jdate <= year.end)
    # Subset only particular hila elements
    setkey(dat, "hila", "jdate")
    dat <- dat[J(hilas)]
    # Subset only a particular range of Julian days
    dat <- subset(dat, jdate >= jdate.start & jdate <= jdate.end)
    # Add the subsetted data to the list
    csv.files.data[[csv.file]] <- dat
  }
  
  # Row bind all the subsetted data frames (read from the csv files) into a one
  # big data frame. This is all the data for the particular hila elements over
  # all years.
  all.data <- do.call("rbind", csv.files.data)
  return(data.frame(all.data))
 }

 # Define the folder from which to read
 csv.folder <- "c:/temp"

 dat <- read.weather.data.table(csv.folder, 5, 100, 110)

 # Summarise for particular hila elements
 day.summary <- ddply(dat, .(hila), summarise,
                     mean.temp=mean(temp),
                     mean.perc=mean(rain))

 write.table(day.summary, file.path(csv.folder, "results.csv"))
	library(data.table)

	read.weather.data.table <- function(folder, hilas, jdate.start, jdate.end, years='All') {

	# List all the csv files in the folder
	csv.files <- list.files(folder, "\\.csv$", full.names = TRUE)

	# Create an empty list to hold the content of each csv file
	csv.files.data <- list()

	col.names = c("hila", "year", "x1", "x2", "temp", "rain", "jdate")

	# Loop through all the csv files
	for (csv.file in csv.files) {
	# Read the content
	dat <- fread(csv.file)
	setnames(dat, 1:length(col.names), col.names)

	#subset only years that we need
	#dat<-subset(dat,year >= year.start & jdate <= year.end)
	# Subset only particular hila elements
	setkey(dat, "hila", "jdate")
	dat <- dat[J(hilas)]
	# Subset only a particular range of Julian days
	dat <- subset(dat, jdate >= jdate.start & jdate <= jdate.end)
	# Add the subsetted data to the list
	csv.files.data[[csv.file]] <- dat
	}

	# Row bind all the subsetted data frames (read from the csv files) into a one
	# big data frame. This is all the data for the particular hila elements over
	# all years.
	all.data <- do.call("rbind", csv.files.data)
	return(data.frame(all.data))
	}

	# Define the folder from which to read
	csv.folder <- "c:/temp"

	dat <- read.weather.data.table(csv.folder, 5, 100, 110)

	# Summarise for particular hila elements
	day.summary <- ddply(dat, .(hila), summarise,
	mean.temp=mean(temp),
	mean.perc=mean(rain))

	write.table(day.summary, file.path(csv.folder, "results.csv"))