Last active
February 4, 2018 07:58
-
-
Save hhl60492/5af44ad86d0b1d17ac09e1ba5f66a076 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sompy.sompy import SOMFactory | |
import pandas as pd | |
import glob | |
import os | |
# read in all csvs from folder | |
path = '..\\..\\data\\' | |
all_files = glob.glob(os.path.join(path, "*.csv")) | |
# concat into one df | |
df_from_each_file = (pd.read_csv(f, skiprows = 31) for f in all_files) | |
concatenated_df = pd.concat(df_from_each_file, ignore_index=True) | |
# get columns Lat, Long, Mean Temp, Max Temp, Min temp, Precipitation | |
data = concatenated_df[['Lat', 'Long', 'Tm', 'Tx', 'Tn', 'P']] | |
data = data.apply(pd.to_numeric, errors='coerce') | |
data = data.dropna(how='any') | |
names = ['Latitude', "longitude", 'Monthly Median temperature (C)','Monthly Max temperature (C)', 'Monthly Min temperature (C)', 'Monthly total precipitation (mm)'] | |
print(data.head()) | |
# create the SOM network and train it. You can experiment with different normalizations and initializations | |
sm = SOMFactory().build(data.values, normalization = 'var', initialization='pca', component_names=names) | |
sm.train(n_job=1, verbose=False, train_rough_len=2, train_finetune_len=5) | |
# The quantization error: average distance between each data vector and its BMU. | |
# The topographic error: the proportion of all data vectors for which first and second BMUs are not adjacent units. | |
topographic_error = sm.calculate_topographic_error() | |
quantization_error = np.mean(sm._bmu[1]) | |
print ("Topographic error = %s; Quantization error = %s" % (topographic_error, quantization_error)) | |
# component planes view | |
from sompy.visualization.mapview import View2D | |
view2D = View2D(10,10,"rand data",text_size=12) | |
view2D.show(sm, col_sz=4, which_dim="all", desnormalize=True) | |
# U-matrix plot | |
from sompy.visualization.umatrix import UMatrixView | |
umat = UMatrixView(width=10,height=10,title='U-matrix') | |
umat.show(sm) | |
# do the K-means clustering on the SOM grid, sweep across k = 2 to 20 | |
from sompy.visualization.hitmap import HitMapView | |
K = 20 # stop at this k | |
sm.cluster(K) | |
hits = HitMapView(20,20,"Clustering",text_size=12) | |
a=hits.show(sm) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment