Skip to content

Instantly share code, notes, and snippets.

sf_merged.loc[sf_merged['Cluster Labels'] == 1, sf_merged.columns[[1] + list(range(5, sf_merged.shape[1]))]]
sf_merged.loc[sf_merged['Cluster Labels'] == 0, sf_merged.columns[[1] + list(range(5, sf_merged.shape[1]))]]
# Cluster 1
sf_merged.loc[sf_merged['Cluster Labels'] == 0, sf_merged.columns[[1] + list(range(5, sf_merged.shape[1]))]]
# Cluster 2
sf_merged.loc[sf_merged['Cluster Labels'] == 1, sf_merged.columns[[1] + list(range(5, sf_merged.shape[1]))]]
# Cluster 3
sf_merged.loc[sf_merged['Cluster Labels'] == 2, sf_merged.columns[[1] + list(range(5, sf_merged.shape[1]))]]
# Cluster 4
# create map
map_clusters = folium.Map(location = [latitude, longitude], zoom_start = 11)
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i * x) ** 2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
# add markers to the map
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
sf_merged = sf_data
sf_merged = sf_merged.merge(neighborhoods_venues_sorted, on = 'Neighborhood')
sf_merged.head()
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 5
sf_grouped_clustering = sf_grouped.drop('Neighborhood', 1)
# run k-means clustering
kmeans = KMeans(n_clusters = kclusters, random_state = 0).fit(sf_grouped_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]
num_top_venues = 10
indicators = ['st', 'nd', 'rd']
# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
try:
# append 'st', 'nd', 'rd' to the top 3 venues
columns.append('{}{} Most Common Venue'.format(ind + 1, indicators[ind]))
def return_most_common_venues(row, num_top_venues):
row_categories = row.iloc[1:]
row_categories_sorted = row_categories.sort_values(ascending = False)
return row_categories_sorted.index.values[0:num_top_venues]
sf_grouped = sf_onehot.groupby('Neighborhood').mean().reset_index()
sf_grouped.head()
# one hot encoding
sf_onehot = pd.get_dummies(sf_venues[['Venue Category']], prefix = "", prefix_sep = "")
# add neighborhood column back to dataframe
sf_onehot['Neighborhood'] = sf_venues['Neighborhood']
# move neighborhood column to the first column
fixed_columns = [sf_onehot.columns[-1]] + list(sf_onehot.columns[:-1])
sf_onehot = sf_onehot[fixed_columns]