Created
July 4, 2017 23:19
-
-
Save saliksyed/3986043a973123f33f0129c7a4d9182a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Wed Jul 5 00:19:18 2017 | |
@author: saliksyed | |
""" | |
import json | |
from sklearn import svm | |
routes = json.loads(open("routes.json").read()) | |
country_to_country_code = json.loads(open("country_name_to_country_code.json").read()) | |
populations = json.loads(open("population_by_country_code.json").read()) | |
gdps = json.loads(open("gdp_by_country_code.json").read()) | |
country_code_to_feature = {} | |
for country in routes: | |
if country in country_to_country_code: | |
code = country_to_country_code[country] | |
to_amount = sum(routes[country]["to"].values()) | |
from_amount = sum(routes[country]["from"].values()) | |
country_code_to_feature[code] = [to_amount, from_amount] | |
X = [] | |
y = [] | |
# read the data in to the X, y vectors: | |
for code in country_code_to_feature: | |
if populations[code] != None: # skip examples without gdp data | |
X.append(country_code_to_feature[code]) | |
if populations[code] > 100000000: | |
y.append(1) | |
else: | |
y.append(0) | |
# now we'll keep most of the data is training data, and keep the rest to "test" | |
# if our algorithm actually works on new examples it hasn't seen before | |
num_train = int(len(X)/2.0) | |
X_train = X[:num_train] | |
y_train = y[:num_train] | |
X_test = X[num_train:] | |
y_test = y[num_train:] | |
clf = svm.SVC() | |
clf.fit(X_train, y_train) | |
predictions = clf.predict(X_test) | |
print predictions | |
# Now let's check our predictions: | |
correct = 0 | |
for i, prediction in enumerate(predictions): | |
if prediction == y_test[i]: | |
correct += 1 | |
print "Percentage correct:" | |
print float(correct) / len(predictions) * 100 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment