Skip to content

Instantly share code, notes, and snippets.

@avioli
Forked from rvanbruggen/loadgtfs.cql
Created July 3, 2017 03:26
Show Gist options
  • Save avioli/de33895ae045258e7f3d714b9dfe3fa8 to your computer and use it in GitHub Desktop.
Save avioli/de33895ae045258e7f3d714b9dfe3fa8 to your computer and use it in GitHub Desktop.
Loading and Querying GTFS data
//LOAD CSV script for GFTS data
create constraint on (a:Agency) assert a.id is unique;
create constraint on (r:Route) assert r.id is unique;
create constraint on (t:Trip) assert t.id is unique;
create index on :Trip(service_id);
create constraint on (s:Stop) assert s.id is unique;
create index on :Stoptime(stop_sequence);
create index on :Stop(name);
schema await
//add the agency
load csv with headers from
'file:///nmbs/agency.txt' as csv
create (a:Agency {id: toInt(csv.agency_id), name: csv.agency_name, url: csv.agency_url, timezone: csv.agency_timezone});
// add the routes
load csv with headers from
'file:///nmbs/routes.txt' as csv
match (a:Agency {id: toInt(csv.agency_id)})
create (a)-[:OPERATES]->(r:Route {id: csv.route_id, short_name: csv.route_short_name, long_name: csv.route_long_name, type: toInt(csv.route_type)});
// add the trips
load csv with headers from
'file:///nmbs/trips.txt' as csv
match (r:Route {id: csv.route_id})
merge (r)<-[:USES]-(t:Trip {id: csv.trip_id, service_id: csv.service_id});
//add the stops
load csv with headers from
'file:///nmbs/stops.txt' as csv
create (s:Stop {id: csv.stop_id, name: csv.stop_name, lat: toFloat(csv.stop_lat), lon: toFloat(csv.stop_lon), platform_code: csv.platform_code, parent_station: csv.parent_station, location_type: csv.location_type});
//connect parent/child relationships to stops
load csv with headers from
'file:///nmbs/stops.txt' as csv
with csv
where not (csv.parent_station is null)
match (ps:Stop {id: csv.parent_station}), (s:Stop {id: csv.stop_id})
create (ps)<-[:PART_OF]-(s);
//add the stoptimes
load csv with headers from
'file:///nmbs/stop_times.txt' as csv
match (t:Trip {id: csv.trip_id}), (s:Stop {id: csv.stop_id})
create (t)<-[:PART_OF_TRIP]-(st:Stoptime {arrival_time: csv.arrival_time, departure_time: csv.departure_time, stop_sequence: toInt(csv.stop_sequence)})-[:LOCATED_AT]->(s);
//create integers out of the stoptimes (to allow for calculations/ordering)
match (s:Stoptime)
set s.arrival_time_int=toInt(replace(s.arrival_time,":",""))/100
set s.departure_time_int=toInt(replace(s.departure_time,":",""))/100
//connect the stoptime sequences
match (s1:Stoptime)-[:PART_OF_TRIP]->(t:Trip),
(s2:Stoptime)-[:PART_OF_TRIP]->(t)
where s2.stop_sequence=s1.stop_sequence+1
create (s1)-[:PRECEDES]->(s2);
//Query a GFTS dataset
//Find ANTWERP
match (s:Stop)
where upper(s.name) starts with "ANTW"
return s
//Find Turnhout part 1
match (s:Stop)
where s.name starts with "Turn"
return s
//Find Turnhout part 2
match (s:Stop)
where upper(s.name) starts with "TURN"
return s
//Find Antwerp and Turnhout
// match (s:Stop)
// where s.name starts with "Antw"
// or s.name starts with "Turn"
// return s;
match (ant:Stop), (tu:Stop)
where ant.name starts with "Antw"
AND tu.name starts with "Turn"
return distinct tu,ant;
// match (ant:Stop)
// where ant.name starts with "Antw"
// return ant.name as name
// union
// match (tu:Stop)
// where tu.name starts with "Turn"
// return tu.name as name;
//find the links between the top-level stops
match (t:Stop)<-[:PART_OF]-(:Stop),
(a:Stop)<-[:PART_OF]-(:Stop)
where t.name starts with "Turn"
AND a.name="Antwerpen-Centraal"
with t,a
match p = allshortestpaths((t)-[*]-(a))
return p
limit 10;
match (t:Stop),(a:Stop)
where t.name starts with "Turn"
AND a.name="Antwerpen-Centraal"
with t,a
match p = allshortestpaths((t)-[*]-(a))
return p
limit 10
//range queries with and without indexes
match (st:Stoptime)
where st.departure_time < "07:45:00"
return st.departure_time;
//find a DIRECT route with range conditions
match (tu:Stop {name: "Turnhout"})--(tu_st:Stoptime)
where tu_st.departure_time > "07:00:00"
AND tu_st.departure_time < "09:00:00"
with tu, tu_st
match (ant:Stop {name:"Antwerpen-Centraal"})--(ant_st:Stoptime)
where ant_st.arrival_time < "09:00:00"
AND ant_st.arrival_time > "07:00:00"
and ant_st.arrival_time > tu_st.departure_time
with ant,ant_st,tu, tu_st
match p = allshortestpaths((tu_st)-[*]->(ant_st))
with nodes(p) as n
unwind n as nodes
match (nodes)-[r]-()
return nodes,r
//Indirect routes
//find the route and the stops for the indirect route
match (t:Stop),(a:Stop)
where t.name = "Turnhout"
AND a.name="Arlon"
with t,a
match p = allshortestpaths((t)-[*]-(a))
where NONE (x in relationships(p) where type(x)="OPERATES")
return p
limit 10
//plan a specific indirect route
MATCH (tu:Stop {name:"Turnhout"})--(st_tu:Stoptime),
(ar:Stop {name:"Arlon"})--(st_ar:Stoptime),
p1=((st_tu)-[:PRECEDES*]->(st_midway_arr:Stoptime)),
(st_midway_arr)--(midway:Stop),
(midway)--(st_midway_dep:Stoptime),
p2=((st_midway_dep)-[:PRECEDES*]->(st_ar))
WHERE
st_tu.departure_time > "08:00:00"
AND st_tu.departure_time < "11:00:00"
AND st_midway_arr.arrival_time > st_tu.departure_time
AND st_midway_dep.departure_time > st_midway_arr.arrival_time
AND st_ar.arrival_time > st_midway_dep.departure_time
RETURN
tu,st_tu,ar,st_ar,p1,p2,midway
order by (st_ar.arrival_time_int-st_tu.departure_time_int) ASC
limit 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment