Skip to content

Instantly share code, notes, and snippets.

View giacecco's full-sized avatar

Gianfranco Cecconi giacecco

View GitHub Profile
@giacecco
giacecco / 1.R
Last active August 29, 2015 14:25
require(dplyr)
require(RPostgreSQL)
target_town <- "BERKHAMSTED"
# read from Land Registry's Price Paid (LRPP) all addresses whose street names are not NULL and
# PAON or SAON are numeric; if both are, take the PAON only
# the DISTINCT below is important, otherwise down this script the AONs of properties that have
# been sold many times will weight more than the others
lr_pp <- tbl(src_postgres("olaf"), sql("SELECT DISTINCT street, town, aon FROM ((SELECT street, town, CAST(SUBSTRING(paon, '^([0-9]+)') AS NUMERIC) AS aon FROM lr_pp WHERE street IS NOT NULL AND paon ~ '^[0-9]+') UNION (SELECT street, town, CAST(SUBSTRING(saon, '^([0-9]+)') AS NUMERIC) AS aon FROM lr_pp WHERE street IS NOT NULL AND paon !~ '^[0-9]+' AND saon ~ '^[0-9]+')) AS a"))
@giacecco
giacecco / 2.R
Last active August 29, 2015 14:25
# calculated a weighted average of the max aon vs its frequency
temp <- max_aon_by_street %>% group_by(max_aon) %>% summarise(frequency = n())
representative_max_aons <- weighted.mean(temp$max_aon, temp$frequency)
# ... and display
representative_max_aons
# read Ordnance Survey's Open Names to get all streets belonging to a 'populated place'
os_on <- tbl(src_postgres("olaf"), sql("SELECT name1, populated_place FROM os_open_names_wog WHERE populated_place != '' AND local_type IN ('Section Of Named Road', 'Named Road', 'Numbered Road', 'Section Of Numbered Road')"))
# filter by target town
target_town_streets <- os_on %>% filter(toupper(populated_place) == toupper(target_town))
# total estimate of no. of streets in the target town
no_of_streets <- collect(target_town_streets %>% summarise(tot = n()))$tot
# display the total estimate of no. of addresses in the target town
# now, how many addresses and what % of the total can I get from LRPP over the estimated total?
lr_pp_with_house_names <- tbl(src_postgres("olaf"), sql("SELECT DISTINCT street, town, paon, saon FROM lr_pp WHERE street IS NOT NULL"))
no_addresses_I_know_from_lrpp <- collect(lr_pp_with_house_names %>% filter(town == target_town) %>% summarise(no_of_addresses = n()))$no_of_addresses
perc_I_know_from_lrpp = no_addresses_I_know_from_lrpp / total_addresses_in_town
# and display what I am missing
total_addresses_in_town - no_addresses_I_know_from_lrpp
1 - perc_I_know_from_lrpp
@giacecco
giacecco / 1.R
Last active August 29, 2015 14:25
require(dplyr)
require(RPostgreSQL)
target_town <- "BERKHAMSTED"
# I fetch all postcodes and populated_places from OS' Open Names
os_on_postcodes <- tbl(src_postgres("olaf"), sql("SELECT name1, populated_place FROM os_open_names WHERE local_type = 'Postcode' AND populated_place != ''"))
# I filter by target town
# TODO: what to do about those parts of town like Northchurch to Berkhamsted that appear as
# display the average number of addresses expected in each postcode
total_addresses_in_town / length(target_town_postcodes)
# sanity check vs the entries in Land Registry's Price Paid
lr_pp <- tbl(src_postgres("olaf"), sql("SELECT DISTINCT town, pcd FROM lr_pp"))
target_town_lr_postcodes <- collect(lr_pp %>% filter(town == target_town))$pcd
# display how many we got
length(target_town_lr_postcodes)
@giacecco
giacecco / 4.R
Last active August 29, 2015 14:25
# are the LRPP postcode current? how many of them I can't find in OSON _at all_, whatever the
# populated place they're associated to?
sum(!target_town_lr_postcodes %in% collect(os_on_postcodes)$postcode)
sum(!target_town_lr_postcodes %in% collect(os_on_postcodes)$postcode) / length(target_town_lr_postcodes)
# how many Berkhamsted postcodes from LRPP are not Berkhamsted postcodes in OSON?
sum(!target_town_lr_postcodes %in% target_town_postcodes)
# what _populated places_ rather than Berkhamsted does OSON list for the same postcodes?
missing_berkhamsted_postcodes_in_oson <- setdiff(target_town_lr_postcodes, target_town_postcodes)
alien_postcodes <- collect(os_on_postcodes %>% filter(postcode %in% missing_berkhamsted_postcodes_in_oson))
unique(alien_postcodes$populated_place)