saliksyed · July 6, 2017 00:50
diff --git a/kmeans_final.py b/kmeans_final.py
 #!/usr/bin/env python2
 # -*- coding: utf-8 -*-
 """
 Created on Wed Jul  5 00:19:18 2017
 @author: saliksyed
 """
 import json
 from sklearn.cluster import KMeans

 country_routes = json.loads(open("routes.json").read())

 countries_to_index = {}
 idx = 0

 for country in country_routes:
    for key in country_routes[country]["to"].keys():
        if not key in countries_to_index:
            countries_to_index[key] = idx
            idx += 1

 total_countries = idx + 1

 vectors = []
 countries = []
 for country in country_routes:
    vec = [0] * total_countries
    for key in country_routes[country]["to"].keys():
        vec[countries_to_index[key]] = 1
    
    vectors.append(vec)
    countries.append(country)


 kmeans = KMeans(n_clusters=10).fit(vectors)

 # zip the pairs and sort them based on their cluster label
 pairs = sorted(zip(countries, kmeans.labels_), key=lambda x : x[1])

 print pairs
	#!/usr/bin/env python2
	# -- coding: utf-8 --
	"""
	Created on Wed Jul 5 00:19:18 2017
	@author: saliksyed
	"""
	import json
	from sklearn.cluster import KMeans

	country_routes = json.loads(open("routes.json").read())

	countries_to_index = {}
	idx = 0

	for country in country_routes:
	for key in country_routes[country]["to"].keys():
	if not key in countries_to_index:
	countries_to_index[key] = idx
	idx += 1

	total_countries = idx + 1

	vectors = []
	countries = []
	for country in country_routes:
	vec = [0] * total_countries
	for key in country_routes[country]["to"].keys():
	vec[countries_to_index[key]] = 1

	vectors.append(vec)
	countries.append(country)


	kmeans = KMeans(n_clusters=10).fit(vectors)

	# zip the pairs and sort them based on their cluster label
	pairs = sorted(zip(countries, kmeans.labels_), key=lambda x : x[1])

	print pairs