Skip to content

Instantly share code, notes, and snippets.

@saliksyed
Created July 4, 2017 23:19
Show Gist options
  • Save saliksyed/3986043a973123f33f0129c7a4d9182a to your computer and use it in GitHub Desktop.
Save saliksyed/3986043a973123f33f0129c7a4d9182a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 5 00:19:18 2017
@author: saliksyed
"""
import json
from sklearn import svm
routes = json.loads(open("routes.json").read())
country_to_country_code = json.loads(open("country_name_to_country_code.json").read())
populations = json.loads(open("population_by_country_code.json").read())
gdps = json.loads(open("gdp_by_country_code.json").read())
country_code_to_feature = {}
for country in routes:
if country in country_to_country_code:
code = country_to_country_code[country]
to_amount = sum(routes[country]["to"].values())
from_amount = sum(routes[country]["from"].values())
country_code_to_feature[code] = [to_amount, from_amount]
X = []
y = []
# read the data in to the X, y vectors:
for code in country_code_to_feature:
if populations[code] != None: # skip examples without gdp data
X.append(country_code_to_feature[code])
if populations[code] > 100000000:
y.append(1)
else:
y.append(0)
# now we'll keep most of the data is training data, and keep the rest to "test"
# if our algorithm actually works on new examples it hasn't seen before
num_train = int(len(X)/2.0)
X_train = X[:num_train]
y_train = y[:num_train]
X_test = X[num_train:]
y_test = y[num_train:]
clf = svm.SVC()
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print predictions
# Now let's check our predictions:
correct = 0
for i, prediction in enumerate(predictions):
if prediction == y_test[i]:
correct += 1
print "Percentage correct:"
print float(correct) / len(predictions) * 100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment