Last active
June 8, 2022 13:26
-
-
Save jpswinski/0ff2ab9adfad22198fcee4b1e59e66e1 to your computer and use it in GitHub Desktop.
Demonstrate using k means clustering on a GeoDataFrame
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Imports | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
from sklearn.cluster import KMeans | |
import geopandas as gp | |
# Read in shapefile | |
gdf = gp.read_file("01_rgi60_Alaska.shp") | |
# Plot shapefile (via GeoDataFrame) | |
f, ax = plt.subplots(1, figsize=(20, 15)) | |
gdf.plot(ax=ax, color='black') | |
plt.show() | |
# Create dataset to cluster (centroids of each geometry) | |
X=np.column_stack((gdf["CenLon"],gdf["CenLat"])) | |
# Find optimal number of clusters (knee in the elbow) | |
wcss = [] | |
for i in range(1, 14): | |
kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42) | |
kmeans.fit(X) | |
wcss.append(kmeans.inertia_) | |
plt.plot(range(1, 14), wcss) | |
plt.title('The Elbow Method') | |
plt.xlabel('Number of clusters') | |
plt.ylabel('WCSS') | |
plt.show() | |
# Run k means clustering algorithm | |
kmeans = KMeans(n_clusters=15, init='k-means++', random_state=5, max_iter=400) | |
y_kmeans = kmeans.fit_predict(X) | |
k=pd.DataFrame(y_kmeans, columns=['cluster']) | |
gdf=gdf.join(k) | |
# Plot clustered GeoDataFrame | |
f, ax = plt.subplots(1, figsize=(20, 15)) | |
gdf.plot(column='cluster',cmap='tab20', ax=ax) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment