Quantisan · February 12, 2012 11:43
diff --git a/wunderground.R b/wunderground.R
 # Scrapes wunderground weather record
 #
 # Based on http://casoilresource.lawr.ucdavis.edu/drupal/node/991

 #
 # Usage:
 # fetch.temperature('ILONDONL8', '2012-01-01', '2012-01-30')
 #
 fetch.temperature <- function(station, start.date, end.date) {
  date.range <- seq.Date(from=as.Date(start.date), to=as.Date(end.date), by='1 day')

  # pre-allocate list
  C <- vector(mode='numeric', length=length(date.range))

  # loop over dates, and fetch data
  for(i in seq_along(date.range)) {
    print(date.range[i])
    C[i] <- fetch.temperature.one(station, date.range[i])
  }
  return(data.frame(C, row.names=date.range))
 }

 fetch.temperature.one <- function(station, date) {
  w <- fetch.weather.raw(station, date)
  return(median(w$TemperatureC))
 }

 # Work in progress
 # Usage:
 # fetch.airport.weather('EGLL', '2012-02-01')
 #
 fetch.airport.weather <- function(station, date) {
  base_url <- 'http://www.wunderground.com/history/airport'
  
  date <- as.Date(date)
  # parse date
  m <- as.integer(format(date, '%m'))
  d <- as.integer(format(date, '%d'))
  y <- format(date, '%Y')
  
  # compose final url
  final_url <- paste(base_url,
                     '/', station,
                     '/', y,
                     '/', m,
                     '/', d, 
                     '/', 'DailyHistory.html?&format=1',
                     sep='')
  print(final_url)
  
  # reading in as raw lines from the web server
  # contains <br> tags on every other line
  u <- url(final_url)
  the_data <- readLines(u)
  close(u)
  
  # only keep records with more than 3 rows of data
  if(length(the_data) > 3 )
        {
        # remove the first and last lines
        the_data <- the_data[-c(1, length(the_data))]
        
        # remove odd numbers starting from 3 --> end
       # the_data <- the_data[-seq(3, length(the_data), by=2)]
        
        # extract header and cleanup
        the_header <- the_data[1]
        the_header <- make.names(strsplit(the_header, ',')[[1]])
        
        # convert to CSV, without header
        tC <- textConnection(paste(the_data, collapse='\n'))
        the_data <- read.csv(tC, as.is=TRUE, row.names=NULL, header=FALSE, skip=1)
        close(tC)
        
        # remove the last column, created by trailing comma
        the_data <- the_data[, -ncol(the_data)]
        
        # assign column names
        names(the_data) <- the_header
        
        # convert Time column into properly encoded date time
        the_data$Time <- as.POSIXct(strptime(the_data$Time, format='%Y-%m-%d %H:%M:%S'))
        
        # remove UTC column
        the_data$DateUTC.br... <- NULL
        
        # sort and fix rownames
        the_data <- the_data[order(the_data$Time), ]
        row.names(the_data) <- 1:nrow(the_data)
        
        # done
        return(the_data)
        }
  }

 #
 # Usage:
 # fetch.weather('IKENSING2', '2012-02-01')
 #
 fetch.weather.raw <- function(station, date) {
  base_url <- 'http://www.wunderground.com/weatherstation/WXDailyHistory.asp?'
  
  date <- as.Date(date)
  # parse date
  m <- as.integer(format(date, '%m'))
  d <- as.integer(format(date, '%d'))
  y <- format(date, '%Y')
  
  # compose final url
  final_url <- paste(base_url,
  'ID=', station,
  '&month=', m,
  '&day=', d, 
  '&year=', y,
  '&format=1', sep='')
  
  # reading in as raw lines from the web server
  # contains <br> tags on every other line
  u <- url(final_url)
  the_data <- readLines(u)
  close(u)
  
  # only keep records with more than 5 rows of data
  if(length(the_data) > 5 )
        {
        # remove the first and last lines
        the_data <- the_data[-c(1, length(the_data))]
        
        # remove odd numbers starting from 3 --> end
        the_data <- the_data[-seq(3, length(the_data), by=2)]
        
        # extract header and cleanup
        the_header <- the_data[1]
        the_header <- make.names(strsplit(the_header, ',')[[1]])
        
        # convert to CSV, without header
        tC <- textConnection(paste(the_data, collapse='\n'))
        the_data <- read.csv(tC, as.is=TRUE, row.names=NULL, header=FALSE, skip=1)
        close(tC)
        
        # remove the last column, created by trailing comma
        the_data <- the_data[, -ncol(the_data)]
        
        # assign column names
        names(the_data) <- the_header
        
        # convert Time column into properly encoded date time
        the_data$Time <- as.POSIXct(strptime(the_data$Time, format='%Y-%m-%d %H:%M:%S'))
        
        # remove UTC and software type columns
        the_data$DateUTC.br. <- NULL
        the_data$SoftwareType <- NULL
        
        # sort and fix rownames
        the_data <- the_data[order(the_data$Time), ]
        row.names(the_data) <- 1:nrow(the_data)
        
        # done
        return(the_data)
        }
  }
	# Scrapes wunderground weather record
	#
	# Based on http://casoilresource.lawr.ucdavis.edu/drupal/node/991

	#
	# Usage:
	# fetch.temperature('ILONDONL8', '2012-01-01', '2012-01-30')
	#
	fetch.temperature <- function(station, start.date, end.date) {
	date.range <- seq.Date(from=as.Date(start.date), to=as.Date(end.date), by='1 day')

	# pre-allocate list
	C <- vector(mode='numeric', length=length(date.range))

	# loop over dates, and fetch data
	for(i in seq_along(date.range)) {
	print(date.range[i])
	C[i] <- fetch.temperature.one(station, date.range[i])
	}
	return(data.frame(C, row.names=date.range))
	}

	fetch.temperature.one <- function(station, date) {
	w <- fetch.weather.raw(station, date)
	return(median(w$TemperatureC))
	}

	# Work in progress
	# Usage:
	# fetch.airport.weather('EGLL', '2012-02-01')
	#
	fetch.airport.weather <- function(station, date) {
	base_url <- 'http://www.wunderground.com/history/airport'

	date <- as.Date(date)
	# parse date
	m <- as.integer(format(date, '%m'))
	d <- as.integer(format(date, '%d'))
	y <- format(date, '%Y')

	# compose final url
	final_url <- paste(base_url,
	'/', station,
	'/', y,
	'/', m,
	'/', d,
	'/', 'DailyHistory.html?&format=1',
	sep='')
	print(final_url)

	# reading in as raw lines from the web server
	# contains <br> tags on every other line
	u <- url(final_url)
	the_data <- readLines(u)
	close(u)

	# only keep records with more than 3 rows of data
	if(length(the_data) > 3 )
	{
	# remove the first and last lines
	the_data <- the_data[-c(1, length(the_data))]

	# remove odd numbers starting from 3 --> end
	# the_data <- the_data[-seq(3, length(the_data), by=2)]

	# extract header and cleanup
	the_header <- the_data[1]
	the_header <- make.names(strsplit(the_header, ',')[[1]])

	# convert to CSV, without header
	tC <- textConnection(paste(the_data, collapse='\n'))
	the_data <- read.csv(tC, as.is=TRUE, row.names=NULL, header=FALSE, skip=1)
	close(tC)

	# remove the last column, created by trailing comma
	the_data <- the_data[, -ncol(the_data)]

	# assign column names
	names(the_data) <- the_header

	# convert Time column into properly encoded date time
	the_data$Time <- as.POSIXct(strptime(the_data$Time, format='%Y-%m-%d %H:%M:%S'))

	# remove UTC column
	the_data$DateUTC.br... <- NULL

	# sort and fix rownames
	the_data <- the_data[order(the_data$Time), ]
	row.names(the_data) <- 1:nrow(the_data)

	# done
	return(the_data)
	}
	}

	#
	# Usage:
	# fetch.weather('IKENSING2', '2012-02-01')
	#
	fetch.weather.raw <- function(station, date) {
	base_url <- 'http://www.wunderground.com/weatherstation/WXDailyHistory.asp?'

	date <- as.Date(date)
	# parse date
	m <- as.integer(format(date, '%m'))
	d <- as.integer(format(date, '%d'))
	y <- format(date, '%Y')

	# compose final url
	final_url <- paste(base_url,
	'ID=', station,
	'&month=', m,
	'&day=', d,
	'&year=', y,
	'&format=1', sep='')

	# reading in as raw lines from the web server
	# contains <br> tags on every other line
	u <- url(final_url)
	the_data <- readLines(u)
	close(u)

	# only keep records with more than 5 rows of data
	if(length(the_data) > 5 )
	{
	# remove the first and last lines
	the_data <- the_data[-c(1, length(the_data))]

	# remove odd numbers starting from 3 --> end
	the_data <- the_data[-seq(3, length(the_data), by=2)]

	# extract header and cleanup
	the_header <- the_data[1]
	the_header <- make.names(strsplit(the_header, ',')[[1]])

	# convert to CSV, without header
	tC <- textConnection(paste(the_data, collapse='\n'))
	the_data <- read.csv(tC, as.is=TRUE, row.names=NULL, header=FALSE, skip=1)
	close(tC)

	# remove the last column, created by trailing comma
	the_data <- the_data[, -ncol(the_data)]

	# assign column names
	names(the_data) <- the_header

	# convert Time column into properly encoded date time
	the_data$Time <- as.POSIXct(strptime(the_data$Time, format='%Y-%m-%d %H:%M:%S'))

	# remove UTC and software type columns
	the_data$DateUTC.br. <- NULL
	the_data$SoftwareType <- NULL

	# sort and fix rownames
	the_data <- the_data[order(the_data$Time), ]
	row.names(the_data) <- 1:nrow(the_data)

	# done
	return(the_data)
	}
	}