Last active
February 14, 2019 02:14
-
-
Save ian-weisser/9994498 to your computer and use it in GitHub Desktop.
Parse a GTFS file for stops near a location
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
""" | |
Locate stop_id candidates from a GTFS stops file. | |
This is a helper application. You use it to discover a list | |
of possible stop_ids for your *real* application. It's handy | |
during debugging, or during setup of an application. | |
Example: You know an intersection name or a lat/lon pair or | |
an existing stop_id, and you want the list of stops nearby. | |
Requires python3, and a zipped GTFS file to read stop data from. | |
Output data includes the raw GTFS stops.txt line. | |
USAGE: (see -h for the full list) | |
$ ./.stop_finder /path/to/GTFS/zipfile --names "oklahoma howell" | |
$ ./.stop_finder /path/to/GTFS/zipfile --latlon 42.9883466 -87.9041176 | |
$ ./.stop_finder /path/to/GTFS/zipfile --stop 5152 | |
EXAMPLE: | |
Let's look for the stops at the corner of Oklahoma & Howell: | |
$ ./stop_finder /path/to/my_file.gtfs --names "oklahoma howell" | |
Looking near LAT=42.988313549999994, LON=-87.90440945 | |
Dist Stop | |
------- -------------------------------------------------- | |
13 709,709,HOWELL & OKLAHOMA,, 42.9882051, -87.9043319,,,1 | |
19 5068,5068,OKLAHOMA & HOWELL,, 42.9883466, -87.9041176,,,1 | |
23 5152,5152,OKLAHOMA & HOWELL,, 42.9881561, -87.9046550,,,1 | |
27 658,658,HOWELL & OKLAHOMA,, 42.9885464, -87.9045333,,,1 | |
149 5069,5069,OKLAHOMA & AUSTIN,, 42.9883253, -87.9066868,,,0 | |
153 5153,5153,OKLAHOMA & QUINCY,, 42.9881773, -87.9020859,,,0 | |
181 5151,5151,OKLAHOMA & AUSTIN,, 42.9881666, -87.9071558,,,0 | |
185 5067,5067,OKLAHOMA & QUINCY,, 42.9883465, -87.9015866,,,0 | |
323 5154,5154,OKLAHOMA & PINE,, 42.9881953, -87.8994944,,,0 | |
350 5066,5066,OKLAHOMA & PINE,, 42.9883965, -87.8990949,,,0 | |
The first four results are within a few meters of each other, and | |
all at the same intersection. The next closest stop is 150 meters | |
away. The first four stops are the stops we want. | |
Results are always in meters, and include *all* nearby stops in | |
order of distance (nearest first). | |
""" | |
import argparse | |
import math | |
import sys | |
import zipfile | |
def parse_command_line(): | |
""" | |
Parse the command line options: | |
- Location of the GTFS zipfile | |
- Type of search (street names, lat/lon, or near an already-known stop_ID) | |
""" | |
parser = argparse.ArgumentParser() | |
parser.add_argument('GTFS_zipfile', metavar='zipfile', | |
help='path to GTFS zipfile') | |
group = parser.add_mutually_exclusive_group() | |
group.add_argument('-n', '--names', '--strings', nargs=1, | |
metavar='"STRING1 STRING2"', | |
help='string search (use quotes for multiple strings)') | |
group.add_argument('-l', '--latlon', '--coordinates', | |
nargs=2, metavar=('LAT.AAAA','LON.BBBB'), | |
help='lat/lon search') | |
group.add_argument('-x', '--stop', '--known-stop', | |
nargs=1, metavar='STOP_ID', | |
help='known stop_id') | |
return parser.parse_args() | |
def read_stops_zipfile(path): | |
""" Read the stops.txt file from the zipfile """ | |
if not zipfile.is_zipfile(path): | |
sys.exit("error: Not a valid GTFS zipfile") | |
gtfs = zipfile.ZipFile(path, mode='r') | |
stops_file = gtfs.open('stops.txt', mode='r') | |
stops_string = stops_file.read().decode('utf-8') | |
stops_file.close() | |
gtfs.close() | |
stops = stops_string.split('\r\n') | |
return stops | |
def string_matching(all_stops, search_strings): | |
""" | |
Return the list of stops that match the *most* strings in the list. | |
Example: 'howell lincoln kk'. In Milwaukee, no stops will match | |
all three terms. Six stops will match two of the three terms, | |
and this function will return a list of those six stops. | |
""" | |
matches = [] | |
highest_match = 0 | |
for stop_line in all_stops: | |
if len(stop_line.split(',')) < 7: | |
continue | |
stop = stop_line.split(',')[2] | |
counter = 0 | |
for string in search_strings[0].split(' '): | |
if string.upper() in stop.upper(): | |
counter = counter + 1 | |
if counter == 0: | |
continue | |
elif counter < highest_match: | |
continue | |
elif counter == highest_match: | |
matches.append(stop_line) | |
else: | |
highest_match = counter | |
matches = [ stop_line ] | |
return matches | |
def xy_distances(latitude): | |
""" | |
Return the lat and lon fractions equal to 500m | |
Example: At 45 deg, 1 degree of longitude (along the 45-line | |
of latitude) is 78,847m | |
So 500m = 500/78847 = 0.006341 deg | |
A 500m bracket would be longitude +/- 0.006431. | |
The y-distance *along* one degree of longitude varies | |
by only about 1000m/deg (1%) between equator and pole | |
The x-distance *along* one degree of latitude depends on the | |
latitude. It's 111320m at the equator, and 0 at the poles. | |
See http://en.wikipedia.org/wiki/ | |
Length_of_a_degree_of_longitude#Length_of_a_degree_of_longitude | |
""" | |
y_deg = 111132 | |
x_deg = int(math.pi * 6378137 * math.cos(float(latitude)) / 180 ) | |
if x_deg > 0: | |
return x_deg, y_deg | |
else: | |
return -1 * x_deg, y_deg | |
def center(list_of_stops): | |
""" Calculate the approximate center of a list of points """ | |
latitude = 0.0 | |
longitude = 0.0 | |
for stop in list_of_stops: | |
latitude = latitude + float(stop.split(',')[4]) | |
longitude = longitude + float(stop.split(',')[5]) | |
avg_latitude = latitude / len(list_of_stops) | |
avg_longitude = longitude / len(list_of_stops) | |
return avg_latitude, avg_longitude | |
def dist(ax, ay, bx, by, x_deg, y_deg): | |
""" Approximate distance in meters between a and b """ | |
if ax > bx: | |
x_dist = (ax - bx) * x_deg | |
else: | |
x_dist = (bx - ax) * x_deg | |
if ay > by: | |
y_dist = (ay - by) * y_deg | |
else: | |
y_dist = (by - ay) * y_deg | |
distance = int(math.sqrt(x_dist**2 + y_dist**2)) | |
return distance | |
def nearby_stops(list_of_stops, center_lat, center_lon): | |
""" | |
Parse the list_of_stops, | |
Return all stops within 500 meters, | |
Display in order, closest first. | |
Limit display to 20 stops. | |
""" | |
maximum_distance = 500.0 | |
list_max_length = 20 | |
x_deg, y_deg = xy_distances(list_of_stops[1].split(',')[4]) | |
x_dist = maximum_distance/x_deg | |
y_dist = maximum_distance/y_deg | |
# Arrange stops in order of distance using a dict | |
final_stops = {} | |
for stop in list_of_stops: | |
if 'stop_id' in stop: | |
continue | |
if stop == '': | |
continue | |
latitude = float(stop.split(',')[4]) | |
longitude = float(stop.split(',')[5]) | |
# Don't bother to calculate distances for stops that | |
# are obviously outside the maximum_distance box | |
if longitude < (center_lon - x_dist) \ | |
or longitude > (center_lon + x_dist) \ | |
or latitude < (center_lat - y_dist) \ | |
or latitude > (center_lat + y_dist): | |
continue | |
# Calculate distance for stops inside the maximum_distance box | |
# and add the stop to the appropriate dict | |
distance = dist(center_lon, center_lat, | |
longitude, latitude, x_deg, y_deg) | |
if distance in final_stops.keys(): | |
final_stops[distance].append(stop) | |
else: | |
final_stops[distance] = [stop] | |
# Order the list of distances, and print in order | |
ordered_list = sorted(final_stops.keys()) | |
counter = 0 | |
for distance in ordered_list: | |
for stop in final_stops[distance]: | |
if counter > list_max_length: | |
break | |
counter = counter + 1 | |
print(distance, "\t", stop) | |
return | |
def run(): | |
""" Locate and arrange the stop_ids from a GTFS stops file """ | |
arg = parse_command_line() | |
all_stops = read_stops_zipfile(arg.GTFS_zipfile) | |
if arg.names is not None: # Names, like streets or 'airport' | |
some_stops = string_matching(all_stops, arg.names) | |
if len(some_stops) > 6: | |
print("Try narrowing your results with another term:") | |
for stop in some_stops: | |
print(" {}".format(stop)) | |
sys.exit() | |
else: | |
latitude, longitude = center(some_stops) | |
elif arg.latlon is not None: # lat/lon coordinates | |
latitude = float(arg.latlon[0]) | |
longitude = float(arg.latlon[1]) | |
elif arg.stop is not None: # known stop': | |
latitude = None | |
for stop in all_stops: | |
if len(stop.split(',')) < 7: | |
continue | |
if stop.split(',')[0] == arg.stop[0]: | |
latitude = float(stop.split(',')[4]) | |
longitude = float(stop.split(',')[5]) | |
break | |
if latitude is None: | |
sys.exit() | |
else: | |
sys.exit() | |
print("Looking near LAT={}, LON={}".format(latitude, longitude)) | |
print("Dist Stop") | |
print("------- --------------------------------------------------") | |
nearby_stops(all_stops, latitude, longitude) | |
if __name__ == "__main__": | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment