Last active
February 20, 2017 03:09
-
-
Save ethen8181/b670b0218defce3588e5cd2292e87d68 to your computer and use it in GitHub Desktop.
geospatial
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# python3.5 | |
import os | |
import heapq | |
import joblib | |
import numpy as np | |
import pandas as pd | |
from shapely.geometry import LineString, Point | |
from sklearn.neighbors import NearestNeighbors | |
from collections import defaultdict, namedtuple | |
def read_data(folder): | |
"""read in probe and link data given the folder name""" | |
probe_path = os.path.join(folder, 'Partition6467ProbePoints.csv') | |
probe = pd.read_csv(probe_path, header = None) | |
probe.columns = ['sampleID', 'dateTime', 'sourceCode', 'latitude', | |
'longitude', 'altitude', 'speed', 'heading'] | |
link_path = os.path.join(folder, 'Partition6467LinkData.csv') | |
link = pd.read_csv(link_path, header = None) | |
link.columns = ['linkPVID', 'refNodeID', 'nrefNodeID', 'length', | |
'functionalClass', 'directionOfTravel', 'speedCategory', | |
'fromRefSpeedLimit', 'toRefSpeedLimit', 'fromRefNumLanes', | |
'toRefNumLanes', 'multiDigitized', 'urban', 'timeZone', | |
'shapeInfo', 'curvatureInfo', 'slopeInfo'] | |
def parse_shape(shape_info): | |
"""extract the longitude and latitude of the shape""" | |
nodes = shape_info.split('|') | |
result = np.zeros(( len(nodes), 2), dtype = np.float) | |
for idx, node in enumerate(nodes): | |
position = node.split('/')[:2] | |
result[idx] = position | |
return result | |
link['nodePositions'] = link['shapeInfo'].apply(lambda row: parse_shape(row)) | |
return probe, link | |
def train_knn(link, probe_positions, k): | |
""" | |
obtain a probe points nearest nodes and | |
the link that those nodes belong to | |
Parameters | |
---------- | |
link : DataFrame | |
link data | |
probe_positions : 2d numpy array | |
probe's latitude and longitude | |
k : int | |
K-nearest neighbors' k | |
Return | |
------ | |
neighbors : list | |
each element of the list is a dictionary, the dictionary's | |
key is the probe's nearest neighbor's node's position and the | |
value is the link id that the node belongs to | |
""" | |
# create the reversed dictionary, where the | |
# latitude and longitude of each node is the | |
# key and the link id is the value | |
node_info = {} | |
link_ids = link['linkPVID'] | |
nodes = link['nodePositions'] | |
for node, link_id in zip(nodes, link_ids): | |
for n in node: | |
node_info[tuple(n)] = link_id | |
# prepare data and train the KNN model, to get the | |
# k nearest nodes for each probe point | |
node_positions = np.vstack(link['nodePositions'].values) | |
model = NearestNeighbors(n_neighbors = k, metric = 'euclidean') | |
model.fit(node_positions) | |
_, indices = model.kneighbors(probe_positions) | |
closest_nodes = node_positions[indices] | |
# for each probe point's nearest node, obtain the link | |
# that each node belongs to, the information is stored | |
# in a dictionary where the key is the node position and | |
# the value is the link that it corresponds to | |
neighbors = [] | |
for i in range(closest_nodes.shape[0]): | |
neighbor = {} | |
for j in closest_nodes[i]: | |
neighbor[tuple(j)] = node_info[tuple(j)] | |
neighbors.append(neighbor) | |
return neighbors | |
def get_closest_link(query, neighbor): | |
""" | |
given a query probe point and its | |
nearest neighbors obtain the nearest link | |
Parameter | |
--------- | |
query : 1d numpy array | |
the probe's longitude and latitude | |
neighbor : dict | |
each key is the longitude and latitude of the nearest | |
neighbors and the value is the corresponding link id | |
Returns | |
------- | |
closest_link : int | |
the query probe's closest link | |
""" | |
nodes = defaultdict(list) | |
dist_pair = namedtuple('dist_pair', ['dist', 'link', 'point']) | |
for position, link_id in neighbor.items(): | |
nodes[link_id].append(position) | |
dist_infos = [] | |
for link, node in nodes.items(): | |
if len(node) > 1: | |
line = LineString(node) # ?can we pre-computed this | |
dist = line.project(Point(query)) | |
point = line.interpolate(dist) | |
point = point.x, point.y | |
else: | |
# if the only 1 node from the link is in the | |
# nearest neighborhood, then simply compute | |
# the euclidean distance from that node to the link | |
diff = query - np.array(node[0]) | |
dist = np.sqrt( np.power(diff, 2).sum() ) | |
# ?change this to get another node from this link, and compute the projected distance | |
point = node[0] | |
dist_info = dist_pair(dist, link, point) | |
dist_infos.append(dist_info) | |
closest_info = heapq.nsmallest(1, dist_infos)[0] | |
closest_link = closest_info.link | |
closest_point = closest_info.point | |
return closest_link, closest_point | |
def create_output(probe_positions, neighbors): | |
""" | |
create the probe's projected distance and | |
the link that it belongs to | |
""" | |
link_ids = [] | |
projected_points = [] | |
for i, pos in enumerate(probe_positions): | |
link_id, projected_point = get_closest_link(pos, neighbors[i]) | |
link_ids.append(link_id) | |
projected_points.append(projected_point) | |
df_link_id = pd.DataFrame(link_ids, columns = ['link_ids']) | |
df_projected_points = pd.DataFrame(projected_points, | |
columns = ['projected_x', 'projected_y']) | |
output = pd.concat([df_link_id, df_projected_points], axis = 1) | |
output.to_csv('output.csv', index = False) | |
return output | |
def haversine_distance(probe, link): | |
""" | |
distance metric for measuring two points on the earth, | |
not used due to its computational inefficiency | |
""" | |
probe = np.deg2rad(probe) | |
link = np.deg2rad(link) | |
hav1 = 1 - np.cos(probe[0] - link[0]) / 2 | |
hav2 = 1 - np.cos(probe[1] - link[1]) / 2 | |
hav = hav1 + np.cos(probe[0]) * np.cos(probe[1]) * hav2 | |
return 2 * np.arcsin(np.sqrt(hav)) | |
if __name__ == '__main__': | |
# specify k nearest neighbor's K and the file path and | |
# the script will output a .csv file under the current | |
# directory, the file contains the probe's projected | |
# latitude, longitude and the link id that it belongs to | |
k = 10 | |
folder = 'probe_data_map_matching' | |
probe, link = read_data(folder) | |
probe_positions = probe[['latitude', 'longitude']].values | |
neighbors = train_knn(link, probe_positions, k) | |
output = create_output(probe_positions, neighbors) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment