Created
August 26, 2022 15:28
-
-
Save alexozwald/1978e1a25e3876dc79160373ddc48c6c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import numpy as np | |
import pandas as pd | |
import plotly.express as px | |
from sklearn import neighbors | |
DINERS_CSV = './maps/all_diners.csv' | |
PLUTO_GEOJSON = './maps/nycpluto_manhattan.geojson' | |
PLUTO_CSV = './maps/pluto_small.csv' | |
DINERS_DTYPES = {'area':'category', 'venue_name':'string', 'address':'string', | |
'lat':np.float64, 'lon':np.float64, 'tip_count':np.uint32, | |
'rating':np.uint32,'rating_signals':np.uint32, 'price':np.uint32} | |
PLUTO_DTYPES = {'borough':'category', 'ownername':'string', 'lotarea':np.uint32, | |
'bldgarea':np.uint32, 'assesstot':np.uint64, 'bbl':np.uint64, | |
'latitude':np.float64, 'longitude':np.float64} | |
var_renames = {'bbl':'BBL#', 'dist_diner':'Distance (rad)', 'ownername':"Owner's Name", | |
'bldgarea':'Building Area', 'lotarea':'Lot Area', 'borough':'Borough', | |
'assesstot':'Assess Total', 'dist_diner_rad':'Distance (rad)', | |
'dist_diner_ft':'Distance (ft)', 'ddist_min':'Min Distance (ft)', | |
'ddist_max':'Max Distance (ft)', 'color':'color'} | |
## load geojson map | |
with open(PLUTO_GEOJSON) as f: | |
mnhttn_map = json.load(f) | |
## DINER DATA | |
# load data | |
diners = pd.read_csv(DINERS_CSV, usecols=['area','venue_name','address','lat','lon','tip_count','rating','rating_signals','price']) | |
diners = diners.astype(DINERS_DTYPES, errors='ignore') | |
diners = diners[diners['area'] == "Manhattan, NY"] | |
# convert coordinates to radians -> coordinate pairs | |
diners['lat'] = diners['lat'].apply(func=np.radians) | |
diners['lon'] = diners['lon'].apply(func=np.radians) | |
diners['coordinates'] = list(zip(diners['lat'], diners['lon'])) | |
# generate balltree | |
diner_locs = diners[['lat', 'lon']].to_numpy() | |
tree = neighbors.BallTree(diner_locs, metric='haversine') | |
## PLUTO DATA | |
df = pd.read_csv(PLUTO_CSV) | |
df = df.dropna(axis=0) | |
df = df.astype(PLUTO_DTYPES) | |
df = df.rename(columns={'latitude':'lat', 'longitude':'lon'}) | |
# convert coordinates to radians -> coordinate pairs | |
df['lat'] = df['lat'].apply(func=np.radians) | |
df['lon'] = df['lon'].apply(func=np.radians) | |
df['coordinates'] = list(zip(df['lat'], df['lon'])) | |
# query existing tree of diner locs with lot locs to get distances | |
lot_locs = df[['lat','lon']].to_numpy() | |
dist_diner_rad, _ = tree.query(lot_locs, k=1) | |
df['dist_diner_rad'] = dist_diner_rad | |
# multiply by radius of earth (20902464 ft) | |
df['dist_diner_ft'] = df['dist_diner_rad'].apply(lambda x: x*20902464) | |
# get range for proper colorization | |
ddist_min = np.round(np.percentile(df['dist_diner_ft'], 20), -2) | |
ddist_max = np.round(np.percentile(df['dist_diner_ft'], 80), -2) | |
coloraxis_range = (ddist_min, ddist_max) | |
# PX choropleth map | |
fig = px.choropleth_mapbox(df, | |
geojson=mnhttn_map, | |
locations='bbl', | |
featureidkey='properties.bbl', | |
color=df['dist_diner_ft'], | |
range_color=coloraxis_range, | |
color_continuous_scale=px.colors.sequential.Viridis_r, | |
mapbox_style='carto-positron', | |
zoom=13, center={'lat': 40.7831, 'lon': -73.9712}, | |
hover_name='ownername', | |
hover_data=['bldgarea', 'lotarea'], | |
labels=var_renames, | |
title=f'Distance to the Nearest Diner, Manhattan', | |
) | |
fig.show() | |
fig.write_html(file='./figure.html') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment