Skip to content

Instantly share code, notes, and snippets.

@davideanastasia
Created February 2, 2019 11:33
Show Gist options
  • Save davideanastasia/ac228b9fa6190972d89e85b1b375b94b to your computer and use it in GitHub Desktop.
Save davideanastasia/ac228b9fa6190972d89e85b1b375b94b to your computer and use it in GitHub Desktop.
def dataframe_from_file(filename):
try:
taxi_data = pd.read_csv(filename, names=['taxi_id', 'ts', 'longitude', 'latitude'], parse_dates=['ts'])
if len(taxi_data.index) == 0:
print("skipping {} as empty".format(filename))
return None
taxi_data['geohash'] = np.vectorize(lambda longitude, latitude: pgh.encode(latitude, longitude, precision=6))(
taxi_data['longitude'],
taxi_data['latitude']
)
return taxi_data[['taxi_id', 'geohash']].drop_duplicates()
except:
print("cannot parse {}".format(filename))
return None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment