Skip to content

Instantly share code, notes, and snippets.

@saliksyed
Created July 6, 2017 00:50
Show Gist options
  • Save saliksyed/172da30f77eb6ca3fb8d951655bcfd3e to your computer and use it in GitHub Desktop.
Save saliksyed/172da30f77eb6ca3fb8d951655bcfd3e to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 5 00:19:18 2017
@author: saliksyed
"""
import json
from sklearn.cluster import KMeans
country_routes = json.loads(open("routes.json").read())
countries_to_index = {}
idx = 0
for country in country_routes:
for key in country_routes[country]["to"].keys():
if not key in countries_to_index:
countries_to_index[key] = idx
idx += 1
total_countries = idx + 1
vectors = []
countries = []
for country in country_routes:
vec = [0] * total_countries
for key in country_routes[country]["to"].keys():
vec[countries_to_index[key]] = 1
vectors.append(vec)
countries.append(country)
kmeans = KMeans(n_clusters=10).fit(vectors)
# zip the pairs and sort them based on their cluster label
pairs = sorted(zip(countries, kmeans.labels_), key=lambda x : x[1])
print pairs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment