Skip to content

Instantly share code, notes, and snippets.

@northernjamie
Last active August 7, 2021 07:36
Show Gist options
  • Save northernjamie/3134fccbe6d352386c13558c2801c995 to your computer and use it in GitHub Desktop.
Save northernjamie/3134fccbe6d352386c13558c2801c995 to your computer and use it in GitHub Desktop.
Thie is an R script that gets real time bus data from DfT, and makes a series of maps and saves as images for making a gif.
library(tidyverse)
library(leaflet)
library(httr)
library(sp)
library(rgdal)
library(mapview)
library(xml2)
library(RColorBrewer)
#######
# This function calls the real time bus api a set number of times, with a set interval,
# and generates a series of images that can be stitched together to make a gif
# the gif-making is done in python
#######
#######
# TODO: Get the server up and running for a 24 hour job
# TODO: Differentiate between inbound/outbound on the map
# TODO: Put the API key into a secret file so I can get this into a public repo on GH
# TODO: Parameterise the duration and interval
#######
getBusData <- function() {
api_key <- "put your api key here" # API key from DfT
bounding_box <- "-2.74,53.3,-1.9,53.7" # Bounding box for GM
gm <- readOGR("inputs/gm.geojson") # Geojson for GM for further points clipping
gtfs_operators <- read_csv("inputs/itm_all_gtfs/agency.txt") # get the bus operators from DfT
i <- 1
for (i in 1:60) {
startTime <- Sys.time() # so that we can make the calls exactly 60 seconds apart (or whatever time interval we want)
doc <- read_xml(paste0("https://data.bus-data.dft.gov.uk/api/v1/datafeed?api_key=621e077f86267d348add17c93a0e31f839dc013a&boundingBox=",bounding_box))
# here, we are using the xml2 package to get specific nodes from the xml file
# the problem is that not all buses have all information. Most suggestions for parsing xml in R
# suggest generating lists, and binding them together, but where nodes don't exist for a particular journey
# record counts are not equal. This approach generates a df for each node that we want, consisting of
# the node and the unique id for the bus journey. If a node doesn't exist, then 'NA' is inserted.
# dplyr is used to join them all up at the end.
df_line_ref <- xml_find_all(doc, ".//d1:VehicleActivity") %>%
map_df(function(x) {
id <- xml_text(xml_find_first(x,".//d1:ItemIdentifier"))
line_refs <- xml_find_all(x, ".//d1:LineRef")
if (length(line_refs) == 0) {
data.frame(id,line=NA)
} else {
map_df(line_refs, function(y) {
line <- xml_text(y) %||% NA
data.frame(id,line)
})
}
})
df_long <- xml_find_all(doc, ".//d1:VehicleActivity") %>%
map_df(function(x) {
id <- xml_text(xml_find_first(x,".//d1:ItemIdentifier"))
longitude <- xml_find_all(x, ".//d1:Longitude")
if (length(longitude) == 0) {
data.frame(id,long=NA)
} else {
map_df(longitude, function(y) {
long <- xml_text(y) %||% NA
data.frame(id,long)
})
}
})
df_lat <- xml_find_all(doc, ".//d1:VehicleActivity") %>%
map_df(function(x) {
id <- xml_text(xml_find_first(x,".//d1:ItemIdentifier"))
latitude <- xml_find_all(x, ".//d1:Latitude")
if (length(latitude) == 0) {
data.frame(id,lat=NA)
} else {
map_df(latitude, function(y) {
lat <- xml_text(y) %||% NA
data.frame(id,lat)
})
}
})
df_bearing <- xml_find_all(doc, ".//d1:VehicleActivity") %>%
map_df(function(x) {
id <- xml_text(xml_find_first(x,".//d1:ItemIdentifier"))
bearings <- xml_find_all(x, ".//d1:Bearing")
if (length(bearings) == 0) {
data.frame(id,bearing=NA)
} else {
map_df(bearings, function(y) {
bearing <- xml_text(y) %||% NA
data.frame(id,bearing)
})
}
})
df_operator <- xml_find_all(doc, ".//d1:VehicleActivity") %>%
map_df(function(x) {
id <- xml_text(xml_find_first(x,".//d1:ItemIdentifier"))
operators <- xml_find_all(x, ".//d1:OperatorRef")
if (length(operators) == 0) {
data.frame(id,operator=NA)
} else {
map_df(operators, function(y) {
operator <- xml_text(y) %||% NA
data.frame(id,operator)
})
}
})
df_direction <- xml_find_all(doc, ".//d1:VehicleActivity") %>%
map_df(function(x) {
id <- xml_text(xml_find_first(x,".//d1:ItemIdentifier"))
directions <- xml_find_all(x, ".//d1:DirectionRef")
if (length(directions) == 0) {
data.frame(id,direction=NA)
} else {
map_df(directions, function(y) {
direction <- xml_text(y) %||% NA
data.frame(id,direction)
})
}
})
df_destination_ref <- xml_find_all(doc, ".//d1:VehicleActivity") %>%
map_df(function(x) {
id <- xml_text(xml_find_first(x,".//d1:ItemIdentifier"))
destination_refs <- xml_find_all(x, ".//d1:DestinationRef")
if (length(destination_refs) == 0) {
data.frame(id,destination_ref=NA)
} else {
map_df(destination_refs, function(y) {
destination_ref <- xml_text(y) %||% NA
data.frame(id,destination_ref)
})
}
})
df_destination_name <- xml_find_all(doc, ".//d1:VehicleActivity") %>%
map_df(function(x) {
id <- xml_text(xml_find_first(x,".//d1:ItemIdentifier"))
destination_names <- xml_find_all(x, ".//d1:DestinationName")
if (length(destination_names) == 0) {
data.frame(id,destination_name=NA)
} else {
map_df(destination_names, function(y) {
destination_name <- xml_text(y) %||% NA
data.frame(id,destination_name)
})
}
})
df_origin_ref <- xml_find_all(doc, ".//d1:VehicleActivity") %>%
map_df(function(x) {
id <- xml_text(xml_find_first(x,".//d1:ItemIdentifier"))
origin_refs <- xml_find_all(x, ".//d1:OriginRef")
if (length(origin_refs) == 0) {
data.frame(id,origin_ref=NA)
} else {
map_df(origin_refs, function(y) {
origin_ref <- xml_text(y) %||% NA
data.frame(id,origin_ref)
})
}
})
df_origin_name <- xml_find_all(doc, ".//d1:VehicleActivity") %>%
map_df(function(x) {
id <- xml_text(xml_find_first(x,".//d1:ItemIdentifier"))
origin_names <- xml_find_all(x, ".//d1:OriginName")
if (length(origin_names) == 0) {
data.frame(id,origin_name=NA)
} else {
map_df(origin_names, function(y) {
origin_name <- xml_text(y) %||% NA
data.frame(id,origin_name)
})
}
})
df_bus_info <- df_line_ref %>%
left_join(df_lat, by = "id") %>%
left_join(df_long, by = "id") %>%
left_join(df_bearing, by = "id") %>%
left_join(df_operator, by = "id") %>%
left_join(df_direction, by = "id") %>%
left_join(df_destination_ref, by = "id") %>%
left_join(df_destination_name, by = "id") %>%
left_join(df_origin_ref, by = "id") %>%
left_join(df_origin_name, by = "id") %>%
left_join(gtfs_operators, by=c("operator" = "agency_noc"))
df_bus_info$long <- as.numeric(df_bus_info$long)
df_bus_info$lat <- as.numeric(df_bus_info$lat)
df_bus_info$bearing <- as.numeric(df_bus_info$bearing)
# turn the data into a spatial points dataframe so that we can clip the dataset to greater manchester
coordinates(df_bus_info) <- c("long","lat")
as(df_bus_info,"SpatialPoints")
proj4string(df_bus_info) <- CRS("+proj=longlat +datum=WGS84")
proj4string(df_bus_info) <- proj4string(gm)
pointsinpoly <- over(df_bus_info,gm)
output <- (cbind(df_bus_info@data,df_bus_info@coords,pointsinpoly$NAME)) %>%
filter(!is.na(pointsinpoly$NAME))
# this bit only runs the first iteration of the loop and is used to define the top 12 operators on the first pass
# this ensures the legend and colours remain consistent throughout the animation, at the cost of not accounting
# for changes to the top12
# there are two top12s - the first gets the top12 companies, to allow us to replace all the others with 'Other'
# The second then recalculates the top 12 so that it includes the 'Others'
if(i==1) {
top12 <- output %>%
group_by(agency_name) %>%
filter(!is.na(agency_name)) %>%
summarise(count_agencies=n()) %>%
slice_max(count_agencies,n=12,with_ties=F)
output2 <- output %>%
mutate(agency_name=ifelse(agency_name %in% top12$agency_name,agency_name,"Other"))
top12 <- output %>%
group_by(agency_name) %>%
filter(!is.na(agency_name)) %>%
summarise(count_agencies=n()) %>%
slice_max(count_agencies,n=12,with_ties=F)
}
output2 <- output %>%
mutate(agency_name=ifelse(agency_name %in% top12$agency_name,agency_name,"Other"))
agency_pal <- colorFactor("Set3",levels=top12$agency_name)
bus_map <- leaflet(data = output) %>%
addProviderTiles("CartoDB.Positron") %>%
addCircleMarkers(~long,~lat,weight=0.5, color="#333333", radius = 5, fillColor = ~agency_pal(agency_name),fillOpacity=0.5,opacity=1) %>%
addPolygons(data = gm, weight=1, color="#444444",fillOpacity=0.05, opacity=1) %>%
addControl(startTime) %>%
addLegend(pal = agency_pal, values = top12$agency_name, opacity = 1)
# save the map as a png
mapshot(bus_map, file = paste0("outputs/map",i,".png"))
i = i+1
# this bit ensures the data is captured every 60 seconds, rather than 60seconds plus computing time
sleepTime <- startTime + 60 - Sys.time()
if (sleepTime > 0)
Sys.sleep(sleepTime)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment