jpswinski · June 8, 2022 13:26
diff --git a/kmeans_demo.py b/kmeans_demo.py
 # Imports
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
 from sklearn.cluster import KMeans
 import geopandas as gp

 # Read in shapefile
 gdf = gp.read_file("01_rgi60_Alaska.shp")

 # Plot shapefile (via GeoDataFrame)
 f, ax = plt.subplots(1, figsize=(20, 15))
 gdf.plot(ax=ax, color='black')
 plt.show()

 # Create dataset to cluster (centroids of each geometry)
 X=np.column_stack((gdf["CenLon"],gdf["CenLat"]))

 # Find optimal number of clusters (knee in the elbow)
 wcss = []
 for i in range(1, 14):
    kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)
 plt.plot(range(1, 14), wcss)
 plt.title('The Elbow Method')
 plt.xlabel('Number of clusters')
 plt.ylabel('WCSS')
 plt.show()

 # Run k means clustering algorithm
 kmeans = KMeans(n_clusters=15, init='k-means++', random_state=5,  max_iter=400)
 y_kmeans = kmeans.fit_predict(X)
 k=pd.DataFrame(y_kmeans, columns=['cluster'])
 gdf=gdf.join(k)

 # Plot clustered GeoDataFrame
 f, ax = plt.subplots(1, figsize=(20, 15))
 gdf.plot(column='cluster',cmap='tab20', ax=ax)
 plt.show()
	# Imports
	import numpy as np
	import matplotlib.pyplot as plt
	import pandas as pd
	from sklearn.cluster import KMeans
	import geopandas as gp

	# Read in shapefile
	gdf = gp.read_file("01_rgi60_Alaska.shp")

	# Plot shapefile (via GeoDataFrame)
	f, ax = plt.subplots(1, figsize=(20, 15))
	gdf.plot(ax=ax, color='black')
	plt.show()

	# Create dataset to cluster (centroids of each geometry)
	X=np.column_stack((gdf["CenLon"],gdf["CenLat"]))

	# Find optimal number of clusters (knee in the elbow)
	wcss = []
	for i in range(1, 14):
	kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
	kmeans.fit(X)
	wcss.append(kmeans.inertia_)
	plt.plot(range(1, 14), wcss)
	plt.title('The Elbow Method')
	plt.xlabel('Number of clusters')
	plt.ylabel('WCSS')
	plt.show()

	# Run k means clustering algorithm
	kmeans = KMeans(n_clusters=15, init='k-means++', random_state=5, max_iter=400)
	y_kmeans = kmeans.fit_predict(X)
	k=pd.DataFrame(y_kmeans, columns=['cluster'])
	gdf=gdf.join(k)

	# Plot clustered GeoDataFrame
	f, ax = plt.subplots(1, figsize=(20, 15))
	gdf.plot(column='cluster',cmap='tab20', ax=ax)
	plt.show()