Skip to content

Instantly share code, notes, and snippets.

@rvanbruggen
Last active April 17, 2024 22:45
Show Gist options
  • Save rvanbruggen/9f6efa3f426ee6ce724f to your computer and use it in GitHub Desktop.
Save rvanbruggen/9f6efa3f426ee6ce724f to your computer and use it in GitHub Desktop.
Loading and Querying GTFS data
//LOAD CSV script for GFTS data
create constraint on (a:Agency) assert a.id is unique;
create constraint on (r:Route) assert r.id is unique;
create constraint on (t:Trip) assert t.id is unique;
create index on :Trip(service_id);
create constraint on (s:Stop) assert s.id is unique;
create index on :Stoptime(stop_sequence);
create index on :Stop(name);
schema await
//add the agency
load csv with headers from
'file:///nmbs/agency.txt' as csv
create (a:Agency {id: toInt(csv.agency_id), name: csv.agency_name, url: csv.agency_url, timezone: csv.agency_timezone});
// add the routes
load csv with headers from
'file:///nmbs/routes.txt' as csv
match (a:Agency {id: toInt(csv.agency_id)})
create (a)-[:OPERATES]->(r:Route {id: csv.route_id, short_name: csv.route_short_name, long_name: csv.route_long_name, type: toInt(csv.route_type)});
// add the trips
load csv with headers from
'file:///nmbs/trips.txt' as csv
match (r:Route {id: csv.route_id})
merge (r)<-[:USES]-(t:Trip {id: csv.trip_id, service_id: csv.service_id});
//add the stops
load csv with headers from
'file:///nmbs/stops.txt' as csv
create (s:Stop {id: csv.stop_id, name: csv.stop_name, lat: toFloat(csv.stop_lat), lon: toFloat(csv.stop_lon), platform_code: csv.platform_code, parent_station: csv.parent_station, location_type: csv.location_type});
//connect parent/child relationships to stops
load csv with headers from
'file:///nmbs/stops.txt' as csv
with csv
where not (csv.parent_station is null)
match (ps:Stop {id: csv.parent_station}), (s:Stop {id: csv.stop_id})
create (ps)<-[:PART_OF]-(s);
//add the stoptimes
load csv with headers from
'file:///nmbs/stop_times.txt' as csv
match (t:Trip {id: csv.trip_id}), (s:Stop {id: csv.stop_id})
create (t)<-[:PART_OF_TRIP]-(st:Stoptime {arrival_time: csv.arrival_time, departure_time: csv.departure_time, stop_sequence: toInt(csv.stop_sequence)})-[:LOCATED_AT]->(s);
//create integers out of the stoptimes (to allow for calculations/ordering)
match (s:Stoptime)
set s.arrival_time_int=toInt(replace(s.arrival_time,":",""))/100
set s.departure_time_int=toInt(replace(s.departure_time,":",""))/100
//connect the stoptime sequences
match (s1:Stoptime)-[:PART_OF_TRIP]->(t:Trip),
(s2:Stoptime)-[:PART_OF_TRIP]->(t)
where s2.stop_sequence=s1.stop_sequence+1
create (s1)-[:PRECEDES]->(s2);
//Query a GFTS dataset
//Find ANTWERP
match (s:Stop)
where upper(s.name) starts with "ANTW"
return s
//Find Turnhout part 1
match (s:Stop)
where s.name starts with "Turn"
return s
//Find Turnhout part 2
match (s:Stop)
where upper(s.name) starts with "TURN"
return s
//Find Antwerp and Turnhout
// match (s:Stop)
// where s.name starts with "Antw"
// or s.name starts with "Turn"
// return s;
match (ant:Stop), (tu:Stop)
where ant.name starts with "Antw"
AND tu.name starts with "Turn"
return distinct tu,ant;
// match (ant:Stop)
// where ant.name starts with "Antw"
// return ant.name as name
// union
// match (tu:Stop)
// where tu.name starts with "Turn"
// return tu.name as name;
//find the links between the top-level stops
match (t:Stop)<-[:PART_OF]-(:Stop),
(a:Stop)<-[:PART_OF]-(:Stop)
where t.name starts with "Turn"
AND a.name="Antwerpen-Centraal"
with t,a
match p = allshortestpaths((t)-[*]-(a))
return p
limit 10;
match (t:Stop),(a:Stop)
where t.name starts with "Turn"
AND a.name="Antwerpen-Centraal"
with t,a
match p = allshortestpaths((t)-[*]-(a))
return p
limit 10
//range queries with and without indexes
match (st:Stoptime)
where st.departure_time < "07:45:00"
return st.departure_time;
//find a DIRECT route with range conditions
match (tu:Stop {name: "Turnhout"})--(tu_st:Stoptime)
where tu_st.departure_time > "07:00:00"
AND tu_st.departure_time < "09:00:00"
with tu, tu_st
match (ant:Stop {name:"Antwerpen-Centraal"})--(ant_st:Stoptime)
where ant_st.arrival_time < "09:00:00"
AND ant_st.arrival_time > "07:00:00"
and ant_st.arrival_time > tu_st.departure_time
with ant,ant_st,tu, tu_st
match p = allshortestpaths((tu_st)-[*]->(ant_st))
with nodes(p) as n
unwind n as nodes
match (nodes)-[r]-()
return nodes,r
//Indirect routes
//find the route and the stops for the indirect route
match (t:Stop),(a:Stop)
where t.name = "Turnhout"
AND a.name="Arlon"
with t,a
match p = allshortestpaths((t)-[*]-(a))
where NONE (x in relationships(p) where type(x)="OPERATES")
return p
limit 10
//plan a specific indirect route
MATCH (tu:Stop {name:"Turnhout"})--(st_tu:Stoptime),
(ar:Stop {name:"Arlon"})--(st_ar:Stoptime),
p1=((st_tu)-[:PRECEDES*]->(st_midway_arr:Stoptime)),
(st_midway_arr)--(midway:Stop),
(midway)--(st_midway_dep:Stoptime),
p2=((st_midway_dep)-[:PRECEDES*]->(st_ar))
WHERE
st_tu.departure_time > "08:00:00"
AND st_tu.departure_time < "11:00:00"
AND st_midway_arr.arrival_time > st_tu.departure_time
AND st_midway_dep.departure_time > st_midway_arr.arrival_time
AND st_ar.arrival_time > st_midway_dep.departure_time
RETURN
tu,st_tu,ar,st_ar,p1,p2,midway
order by (st_ar.arrival_time_int-st_tu.departure_time_int) ASC
limit 1
@abed-el-jbara
Copy link

abed-el-jbara commented Feb 9, 2017

Thanks Bruggen,
I read in Bruggen blog about: Loading General Transport Feed Spec (GTFS) files into Neo4j.
It was so helpful.
I'm looking on how can i write a query which get a spanning tree for all the stop's which in a given range of departure_time and arrival_time, without filtering on a specific stop.
perhaps next step i want to run a query which give answer about transportation acceptability between two given areas.
Do you have any suggestion how to do it?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment