Last active
December 22, 2015 07:59
-
-
Save jamestrimble/6442229 to your computer and use it in GitHub Desktop.
An example of downloading time series from the UK Office for National Statistics directly to R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ONS datasets can be found at | |
# http://www.ons.gov.uk/ons/datasets-and-tables/index.html | |
# (Filter out reference tables at the right of the page) | |
library(reshape2) | |
library(plyr) | |
library(ggplot2) | |
# This is the time-consuming part. To get the maximum use out of your data, | |
# create a CSV file listing CDIDs (variable codes) and some metadata. | |
# It's obviously better to use a csv file rather than a long string in real life! | |
metadata <- read.csv( | |
text=paste('CDID,geo,sex,age,variable,seasonally_adj', | |
'LF3Y,England,All,16-64,Employment Rate,SA', | |
'LF4D,England,Female,16-64,Employment Rate,SA', | |
'YCLL,England,Male,16-64,Employment Rate,SA', | |
'LF5Z,Northern Ireland,All,16-64,Employment Rate,SA', | |
'LF62,Northern Ireland,Female,16-64,Employment Rate,SA', | |
'ZSFJ,Northern Ireland,Male,16-64,Employment Rate,SA', | |
'LF42,Scotland,All,16-64,Employment Rate,SA', | |
'LF4F,Scotland,Female,16-64,Employment Rate,SA', | |
'YCLN,Scotland,Male,16-64,Employment Rate,SA', | |
'LF24,UK,All,16-64,Employment Rate,SA', | |
'LF25,UK,Female,16-64,Employment Rate,SA', | |
'MGSV,UK,Male,16-64,Employment Rate,SA', | |
'LF3Z,Wales,All,16-64,Employment Rate,SA', | |
'LF4E,Wales,Female,16-64,Employment Rate,SA', | |
'YCLM,Wales,Male,16-64,Employment Rate,SA', | |
sep='\n')) | |
dataset <- 'lms' | |
uri <- 'http://www.ons.gov.uk/ons/datasets-and-tables/downloads/csv.csv' | |
uri <- paste(uri, | |
'?dataset=', | |
dataset, | |
'&cdid=', | |
paste(metadata$CDID, collapse='%2C'), | |
sep='') | |
data_string <- readLines(uri) | |
data_string <- paste(data_string, collapse='\n') | |
data_string <- sub('\n\n.*', '', data_string) # remove copyright notice and metadata | |
csv <- read.csv(text=data_string) | |
names(csv)[1] <- 'time' | |
# Just use the quarterly data | |
csv$time <- as.character(csv$time) | |
csv <- csv[nchar(csv$time)==7, ] | |
years <- as.numeric(substr(csv$time, 1, 4)) | |
quarters <- as.numeric(substr(csv$time, 7, 7)) | |
# Change quarters to fractional numbers (I'm not sure if this is the best way to use the data) | |
csv$time <- years + (quarters-1)/4 | |
# Convert table to long for for use with ggplot2 | |
data_long <- melt(csv, id.vars='time', variable.name='CDID', value.name='value') | |
data_long <- join(data_long, metadata) | |
data_long$value <- as.numeric(data_long$value) | |
# plot employment rate time series by country (colour) and sex (panel) | |
plot_data <- data_long[!is.na(data_long$value), ] | |
ggplot(plot_data, aes(x=time, y=value, colour=geo, group=geo)) + | |
facet_grid(sex ~ .) + | |
geom_line() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment