Created
July 6, 2017 00:50
-
-
Save saliksyed/172da30f77eb6ca3fb8d951655bcfd3e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Wed Jul 5 00:19:18 2017 | |
@author: saliksyed | |
""" | |
import json | |
from sklearn.cluster import KMeans | |
country_routes = json.loads(open("routes.json").read()) | |
countries_to_index = {} | |
idx = 0 | |
for country in country_routes: | |
for key in country_routes[country]["to"].keys(): | |
if not key in countries_to_index: | |
countries_to_index[key] = idx | |
idx += 1 | |
total_countries = idx + 1 | |
vectors = [] | |
countries = [] | |
for country in country_routes: | |
vec = [0] * total_countries | |
for key in country_routes[country]["to"].keys(): | |
vec[countries_to_index[key]] = 1 | |
vectors.append(vec) | |
countries.append(country) | |
kmeans = KMeans(n_clusters=10).fit(vectors) | |
# zip the pairs and sort them based on their cluster label | |
pairs = sorted(zip(countries, kmeans.labels_), key=lambda x : x[1]) | |
print pairs |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment