Skip to content

Instantly share code, notes, and snippets.

@jpswinski
Last active June 8, 2022 13:26
Show Gist options
  • Save jpswinski/0ff2ab9adfad22198fcee4b1e59e66e1 to your computer and use it in GitHub Desktop.
Save jpswinski/0ff2ab9adfad22198fcee4b1e59e66e1 to your computer and use it in GitHub Desktop.
Demonstrate using k means clustering on a GeoDataFrame
# Imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans
import geopandas as gp
# Read in shapefile
gdf = gp.read_file("01_rgi60_Alaska.shp")
# Plot shapefile (via GeoDataFrame)
f, ax = plt.subplots(1, figsize=(20, 15))
gdf.plot(ax=ax, color='black')
plt.show()
# Create dataset to cluster (centroids of each geometry)
X=np.column_stack((gdf["CenLon"],gdf["CenLat"]))
# Find optimal number of clusters (knee in the elbow)
wcss = []
for i in range(1, 14):
kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
kmeans.fit(X)
wcss.append(kmeans.inertia_)
plt.plot(range(1, 14), wcss)
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()
# Run k means clustering algorithm
kmeans = KMeans(n_clusters=15, init='k-means++', random_state=5, max_iter=400)
y_kmeans = kmeans.fit_predict(X)
k=pd.DataFrame(y_kmeans, columns=['cluster'])
gdf=gdf.join(k)
# Plot clustered GeoDataFrame
f, ax = plt.subplots(1, figsize=(20, 15))
gdf.plot(column='cluster',cmap='tab20', ax=ax)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment