Last active
January 17, 2017 19:03
-
-
Save jamestrimble/6917136 to your computer and use it in GitHub Desktop.
An R script to prepare the data for ukdataexplorer.com/census/
Based on work by Alex Singleton, http://www.alex-singleton.com/2011-census-open-atlas-project/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to create data for http://ukdataexplorer.com/census/ | |
# Author James Trimble, james.trimble at yahoo.co.uk | |
# | |
# This code is based on Alex Singleton's work: | |
# http://rpubs.com/alexsingleton/openatlas | |
# http://www.alex-singleton.com/2011-census-open-atlas-project/ | |
# | |
# The script takes around an hour to run. It could be made faster! | |
# The script requires a lookup table and shapefile from | |
# https://geoportal.statistics.gov.uk/geoportal/catalog/search/browse/browse.page | |
# The shapefile of wards neds to be converted to lat-long coordinates, | |
# for example using the four commented lines of code below. | |
# See https://stat.ethz.ch/pipermail/r-sig-geo/2013-February/017656.html | |
# ogrListLayers('WD_DEC_2011_EW_BGC.shp') | |
# map <- readOGR('WD_DEC_2011_EW_BGC.shp', "WD_DEC_2011_EW_BGC") | |
# map <- spTransform(map,CRS("+init=epsg:4326")) | |
# writeOGR(map, 'wards.shp', layer="wards", driver="ESRI Shapefile") | |
library(rgdal) | |
library(classInt) | |
library(plyr) | |
working_dir <- 'working_directory' | |
setwd(working_dir) | |
# Create a list of tables | |
table_list <- c("KS101EW", "KS102EW", "KS103EW", "KS104EW", "KS105EW", "KS106EW", | |
"KS107EW", "KS201EW", "KS202EW", "KS204EW", "KS205EW", "KS206EW", "KS207WA", | |
"KS208WA", "KS209EW", "KS301EW", "KS401EW", "KS402EW", "KS403EW", "KS404EW", | |
"KS405EW", "KS501EW", "KS601EW", "KS602EW", "KS603EW", "KS604EW", "KS605EW", | |
"KS606EW", "KS607EW", "KS608EW", "KS609EW", "KS610EW", "KS611EW", "KS612EW", | |
"KS613EW") | |
table_list <- table_list[substr(table_list,6,7)!="WA"] | |
# Create a lookup-array of statistical populations (the denominator for percentages) | |
stat_populations <- c() | |
for (n in table_list) { | |
fn <- paste(n, 'META0.CSV', sep='') | |
d <- read.csv(fn) | |
stat_populations[n] <- as.character(d$StatisticalPopulation[1]) | |
} | |
table_list <- tolower(paste(table_list, "_2011_ward", sep = "")) | |
# Download Files | |
for (n in 1:length(table_list)) { | |
file <- as.character(table_list[n]) | |
temp <- tempfile(fileext = ".zip") | |
download.file(paste("http://www.nomisweb.co.uk/output/census/2011/", file, | |
".zip", sep = ""), temp) | |
unzip(temp, junkpaths = TRUE) | |
unlink(temp) | |
csv_file <- paste(toupper(sub("_2011_ward", "", file)), "DATA.CSV", sep = "") | |
assign(file, read.csv(csv_file)) | |
} | |
Variable_Desc <- NULL | |
for (n in 1:length(table_list)) { | |
file <- as.character(table_list[n]) | |
csv_file <- paste(toupper(sub("_2011_ward", "", file)), "DESC0.CSV", sep = "") | |
temp <- read.csv(csv_file) | |
Variable_Desc <- rbind(Variable_Desc, temp) | |
remove(temp) | |
} | |
# Create lookup | |
Key_Statistics_2011 <- as.data.frame(ks101ew_2011_ward[, 1]) | |
colnames(Key_Statistics_2011) <- "GeographyCode" | |
# Merge Loop | |
for (n in 1:length(table_list)) { | |
Key_Statistics_2011 <- data.frame(Key_Statistics_2011, get(as.character(table_list[n]))[match(get(as.character(table_list[n]))[, | |
"GeographyCode"], get(as.character(table_list[n]))[, "GeographyCode"]), | |
]) | |
Key_Statistics_2011$GeographyCode.1 <- NULL | |
} | |
# Create key stats table List | |
a <- c("KS101EW", "KS102EW", "KS103EW", "KS104EW", "KS105EW", "KS106EW", "KS107EW", | |
"KS201EW", "KS202EW", "KS204EW", "KS205EW", "KS206EW", "KS207WA", "KS208WA", | |
"KS209EW", "KS301EW", "KS401EW", "KS402EW", "KS403EW", "KS404EW", "KS405EW", | |
"KS501EW", "KS601EW", "KS602EW", "KS603EW", "KS604EW", "KS605EW", "KS606EW", | |
"KS607EW", "KS608EW", "KS609EW", "KS610EW", "KS611EW", "KS612EW", "KS613EW") | |
table_lookup <- c("Usual resident population", "Age structure", "Marital and civil partnership status", | |
"Living arrangements", "Household composition", | |
"Adults not in employment and dependent children and persons with long-term health problem or disability for all households", | |
"Lone parent households with dependent children", "Ethnic group", "National identity", | |
"Country of birth", "Passports held", "Household language", "Welsh language skills", | |
"Welsh language profile", "Religion", "Health and provision of unpaid care", | |
"Dwellings, household spaces and accommodation type", "Tenure", "Rooms, bedrooms and central heating", | |
"Car or van availability", "Communal establishment residents", "Qualifications and students", | |
"Economic activity", "Economic activity - Males", "Economic activity - Females", | |
"Hours worked", "Industry", "Industry - Males", "Industry - Females", "Occupation", | |
"Occupation - Males", "Occupation - Females", "NS-SeC", "NS-SeC - Males", | |
"NS-SeC - Females") | |
names(table_lookup) <- a | |
# download from | |
# https://geoportal.statistics.gov.uk/geoportal/catalog/search/browse/browse.page | |
ward_reg_lkup <- read.csv('lookup/CTRY11_GOR10_CTY11_LAD11_WD11_UK_LU.csv') | |
create_files_for_region <- function(region_name, ward_list_for_region) { | |
short_region_name <- tolower(gsub(' ', '', region_name)) | |
dir_name <- paste('region_output/', short_region_name, sep='') | |
dir.create(dir_name) | |
data_dir_name <- paste(dir_name, '/data', sep='') | |
dir.create(data_dir_name) | |
data_for_region <- Key_Statistics_2011[Key_Statistics_2011$GeographyCode %in% ward_list_for_region,] | |
Variable_Desc <- Variable_Desc[Variable_Desc$ColumnVariableMeasurementUnit!="Count", ] | |
json_str <- '[\n' | |
sep_str <- '' | |
for (i in 1:nrow(Variable_Desc)) { | |
col_code <- as.character(Variable_Desc[i, "ColumnVariableCode"]) | |
if ( length(unique(as.numeric(as.character(data_for_region[, col_code])))) < 3 ) { | |
next # skip this variable if there are less than three different values | |
} | |
brks <- classIntervals(as.numeric(as.character(data_for_region[, col_code])), n = 6, style = "fisher", | |
unique = TRUE)$brks | |
var_json_str <- paste( | |
'', | |
"{\"code\": \"", | |
Variable_Desc[i, "ColumnVariableCode"], | |
"\",\n \"table\": \"", | |
table_lookup[substr(Variable_Desc[i, "ColumnVariableCode"],0,7)], | |
"\",\n \"table_denominator\": \"", | |
stat_populations[substr(Variable_Desc[i, "ColumnVariableCode"],0,7)], | |
"\",\n \"unit\": \"", | |
Variable_Desc[i, "ColumnVariableMeasurementUnit"], | |
"\",\n \"desc\": \"", | |
Variable_Desc[i, "ColumnVariableDescription"], | |
"\",\n \"breaks\": [", | |
paste(brks, collapse=","), | |
"]\n}", | |
sep='') | |
json_str <- paste(json_str, sep_str, var_json_str, sep='') | |
sep_str <- ',\n' | |
data_to_save <- data_for_region[, c(1, which(names(data_for_region)==col_code))] | |
data_json <- '{\n' | |
sep_str2 <- '' | |
for (j in 1:nrow(data_to_save)) { | |
data_val <- data_to_save[j, 2] | |
if (data_val != '..') { | |
data_json <- paste( | |
data_json, | |
sep_str2, | |
' "', | |
data_to_save[j, 1], | |
'": ', | |
data_val, | |
'\n', | |
sep = '') | |
sep_str2 <- ',' | |
} | |
} | |
data_json = paste(data_json, '}', sep='') | |
fileConn<-file(paste(data_dir_name, '/', col_code, '.json', sep='')) | |
writeLines(data_json, fileConn) | |
close(fileConn) | |
} | |
json_str <- paste(json_str, ']', sep='') | |
fileConn<-file(paste(data_dir_name, '/', 'info.json', sep='')) | |
writeLines(json_str, fileConn) | |
close(fileConn) | |
map <- map[map@data$WD11CD %in% ward_list_for_region, ] | |
writeOGR(map, paste(data_dir_name, "/wards.json", sep=''), layer=paste("wards1", short_region_name, sep=''), driver="GeoJSON") | |
} | |
map <- readOGR('geo/wards/wards.shp', 'wards') | |
englandRegionNames <- levels(ward_reg_lkup$GOR10NM) | |
englandRegionNames <- englandRegionNames[englandRegionNames != ''] | |
for (region_name in englandRegionNames) { | |
wards_in_region <- ward_reg_lkup[ward_reg_lkup$GOR10NM==region_name, ]$WD11CD | |
create_files_for_region(region_name, wards_in_region) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment