Skip to content

Instantly share code, notes, and snippets.

@hhl60492
Last active February 4, 2018 07:58
Show Gist options
  • Save hhl60492/5af44ad86d0b1d17ac09e1ba5f66a076 to your computer and use it in GitHub Desktop.
Save hhl60492/5af44ad86d0b1d17ac09e1ba5f66a076 to your computer and use it in GitHub Desktop.
import numpy as np
from sompy.sompy import SOMFactory
import pandas as pd
import glob
import os
# read in all csvs from folder
path = '..\\..\\data\\'
all_files = glob.glob(os.path.join(path, "*.csv"))
# concat into one df
df_from_each_file = (pd.read_csv(f, skiprows = 31) for f in all_files)
concatenated_df = pd.concat(df_from_each_file, ignore_index=True)
# get columns Lat, Long, Mean Temp, Max Temp, Min temp, Precipitation
data = concatenated_df[['Lat', 'Long', 'Tm', 'Tx', 'Tn', 'P']]
data = data.apply(pd.to_numeric, errors='coerce')
data = data.dropna(how='any')
names = ['Latitude', "longitude", 'Monthly Median temperature (C)','Monthly Max temperature (C)', 'Monthly Min temperature (C)', 'Monthly total precipitation (mm)']
print(data.head())
# create the SOM network and train it. You can experiment with different normalizations and initializations
sm = SOMFactory().build(data.values, normalization = 'var', initialization='pca', component_names=names)
sm.train(n_job=1, verbose=False, train_rough_len=2, train_finetune_len=5)
# The quantization error: average distance between each data vector and its BMU.
# The topographic error: the proportion of all data vectors for which first and second BMUs are not adjacent units.
topographic_error = sm.calculate_topographic_error()
quantization_error = np.mean(sm._bmu[1])
print ("Topographic error = %s; Quantization error = %s" % (topographic_error, quantization_error))
# component planes view
from sompy.visualization.mapview import View2D
view2D = View2D(10,10,"rand data",text_size=12)
view2D.show(sm, col_sz=4, which_dim="all", desnormalize=True)
# U-matrix plot
from sompy.visualization.umatrix import UMatrixView
umat = UMatrixView(width=10,height=10,title='U-matrix')
umat.show(sm)
# do the K-means clustering on the SOM grid, sweep across k = 2 to 20
from sompy.visualization.hitmap import HitMapView
K = 20 # stop at this k
sm.cluster(K)
hits = HitMapView(20,20,"Clustering",text_size=12)
a=hits.show(sm)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment