Last active
April 17, 2024 22:45
-
-
Save rvanbruggen/9f6efa3f426ee6ce724f to your computer and use it in GitHub Desktop.
Loading and Querying GTFS data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//LOAD CSV script for GFTS data | |
create constraint on (a:Agency) assert a.id is unique; | |
create constraint on (r:Route) assert r.id is unique; | |
create constraint on (t:Trip) assert t.id is unique; | |
create index on :Trip(service_id); | |
create constraint on (s:Stop) assert s.id is unique; | |
create index on :Stoptime(stop_sequence); | |
create index on :Stop(name); | |
schema await | |
//add the agency | |
load csv with headers from | |
'file:///nmbs/agency.txt' as csv | |
create (a:Agency {id: toInt(csv.agency_id), name: csv.agency_name, url: csv.agency_url, timezone: csv.agency_timezone}); | |
// add the routes | |
load csv with headers from | |
'file:///nmbs/routes.txt' as csv | |
match (a:Agency {id: toInt(csv.agency_id)}) | |
create (a)-[:OPERATES]->(r:Route {id: csv.route_id, short_name: csv.route_short_name, long_name: csv.route_long_name, type: toInt(csv.route_type)}); | |
// add the trips | |
load csv with headers from | |
'file:///nmbs/trips.txt' as csv | |
match (r:Route {id: csv.route_id}) | |
merge (r)<-[:USES]-(t:Trip {id: csv.trip_id, service_id: csv.service_id}); | |
//add the stops | |
load csv with headers from | |
'file:///nmbs/stops.txt' as csv | |
create (s:Stop {id: csv.stop_id, name: csv.stop_name, lat: toFloat(csv.stop_lat), lon: toFloat(csv.stop_lon), platform_code: csv.platform_code, parent_station: csv.parent_station, location_type: csv.location_type}); | |
//connect parent/child relationships to stops | |
load csv with headers from | |
'file:///nmbs/stops.txt' as csv | |
with csv | |
where not (csv.parent_station is null) | |
match (ps:Stop {id: csv.parent_station}), (s:Stop {id: csv.stop_id}) | |
create (ps)<-[:PART_OF]-(s); | |
//add the stoptimes | |
load csv with headers from | |
'file:///nmbs/stop_times.txt' as csv | |
match (t:Trip {id: csv.trip_id}), (s:Stop {id: csv.stop_id}) | |
create (t)<-[:PART_OF_TRIP]-(st:Stoptime {arrival_time: csv.arrival_time, departure_time: csv.departure_time, stop_sequence: toInt(csv.stop_sequence)})-[:LOCATED_AT]->(s); | |
//create integers out of the stoptimes (to allow for calculations/ordering) | |
match (s:Stoptime) | |
set s.arrival_time_int=toInt(replace(s.arrival_time,":",""))/100 | |
set s.departure_time_int=toInt(replace(s.departure_time,":",""))/100 | |
//connect the stoptime sequences | |
match (s1:Stoptime)-[:PART_OF_TRIP]->(t:Trip), | |
(s2:Stoptime)-[:PART_OF_TRIP]->(t) | |
where s2.stop_sequence=s1.stop_sequence+1 | |
create (s1)-[:PRECEDES]->(s2); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Query a GFTS dataset | |
//Find ANTWERP | |
match (s:Stop) | |
where upper(s.name) starts with "ANTW" | |
return s | |
//Find Turnhout part 1 | |
match (s:Stop) | |
where s.name starts with "Turn" | |
return s | |
//Find Turnhout part 2 | |
match (s:Stop) | |
where upper(s.name) starts with "TURN" | |
return s | |
//Find Antwerp and Turnhout | |
// match (s:Stop) | |
// where s.name starts with "Antw" | |
// or s.name starts with "Turn" | |
// return s; | |
match (ant:Stop), (tu:Stop) | |
where ant.name starts with "Antw" | |
AND tu.name starts with "Turn" | |
return distinct tu,ant; | |
// match (ant:Stop) | |
// where ant.name starts with "Antw" | |
// return ant.name as name | |
// union | |
// match (tu:Stop) | |
// where tu.name starts with "Turn" | |
// return tu.name as name; | |
//find the links between the top-level stops | |
match (t:Stop)<-[:PART_OF]-(:Stop), | |
(a:Stop)<-[:PART_OF]-(:Stop) | |
where t.name starts with "Turn" | |
AND a.name="Antwerpen-Centraal" | |
with t,a | |
match p = allshortestpaths((t)-[*]-(a)) | |
return p | |
limit 10; | |
match (t:Stop),(a:Stop) | |
where t.name starts with "Turn" | |
AND a.name="Antwerpen-Centraal" | |
with t,a | |
match p = allshortestpaths((t)-[*]-(a)) | |
return p | |
limit 10 | |
//range queries with and without indexes | |
match (st:Stoptime) | |
where st.departure_time < "07:45:00" | |
return st.departure_time; | |
//find a DIRECT route with range conditions | |
match (tu:Stop {name: "Turnhout"})--(tu_st:Stoptime) | |
where tu_st.departure_time > "07:00:00" | |
AND tu_st.departure_time < "09:00:00" | |
with tu, tu_st | |
match (ant:Stop {name:"Antwerpen-Centraal"})--(ant_st:Stoptime) | |
where ant_st.arrival_time < "09:00:00" | |
AND ant_st.arrival_time > "07:00:00" | |
and ant_st.arrival_time > tu_st.departure_time | |
with ant,ant_st,tu, tu_st | |
match p = allshortestpaths((tu_st)-[*]->(ant_st)) | |
with nodes(p) as n | |
unwind n as nodes | |
match (nodes)-[r]-() | |
return nodes,r | |
//Indirect routes | |
//find the route and the stops for the indirect route | |
match (t:Stop),(a:Stop) | |
where t.name = "Turnhout" | |
AND a.name="Arlon" | |
with t,a | |
match p = allshortestpaths((t)-[*]-(a)) | |
where NONE (x in relationships(p) where type(x)="OPERATES") | |
return p | |
limit 10 | |
//plan a specific indirect route | |
MATCH (tu:Stop {name:"Turnhout"})--(st_tu:Stoptime), | |
(ar:Stop {name:"Arlon"})--(st_ar:Stoptime), | |
p1=((st_tu)-[:PRECEDES*]->(st_midway_arr:Stoptime)), | |
(st_midway_arr)--(midway:Stop), | |
(midway)--(st_midway_dep:Stoptime), | |
p2=((st_midway_dep)-[:PRECEDES*]->(st_ar)) | |
WHERE | |
st_tu.departure_time > "08:00:00" | |
AND st_tu.departure_time < "11:00:00" | |
AND st_midway_arr.arrival_time > st_tu.departure_time | |
AND st_midway_dep.departure_time > st_midway_arr.arrival_time | |
AND st_ar.arrival_time > st_midway_dep.departure_time | |
RETURN | |
tu,st_tu,ar,st_ar,p1,p2,midway | |
order by (st_ar.arrival_time_int-st_tu.departure_time_int) ASC | |
limit 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks Bruggen,
I read in Bruggen blog about: Loading General Transport Feed Spec (GTFS) files into Neo4j.
It was so helpful.
I'm looking on how can i write a query which get a spanning tree for all the stop's which in a given range of departure_time and arrival_time, without filtering on a specific stop.
perhaps next step i want to run a query which give answer about transportation acceptability between two given areas.
Do you have any suggestion how to do it?