saliksyed · July 4, 2017 23:19
diff --git a/final_svm.py b/final_svm.py
 #!/usr/bin/env python2
 # -*- coding: utf-8 -*-
 """
 Created on Wed Jul  5 00:19:18 2017

 @author: saliksyed
 """
 import json
 from sklearn import svm

 routes = json.loads(open("routes.json").read())

 country_to_country_code = json.loads(open("country_name_to_country_code.json").read())

 populations = json.loads(open("population_by_country_code.json").read())

 gdps = json.loads(open("gdp_by_country_code.json").read())

 country_code_to_feature = {}
 for country in routes:
    if country in country_to_country_code:
        code = country_to_country_code[country]
        to_amount = sum(routes[country]["to"].values())
        from_amount = sum(routes[country]["from"].values())
        country_code_to_feature[code] = [to_amount, from_amount]
        

 X = []
 y = []

 # read the data in to the X, y vectors:
 for code in country_code_to_feature:
    if populations[code] != None: # skip examples without gdp data
        X.append(country_code_to_feature[code])
        if populations[code] > 100000000:
            y.append(1)
        else:
            y.append(0)
    

 # now we'll keep most of the data is training data, and keep the rest to "test"
 # if our algorithm actually works on new examples it hasn't seen before

 num_train = int(len(X)/2.0)

 X_train = X[:num_train]
 y_train = y[:num_train]

 X_test = X[num_train:]
 y_test = y[num_train:]
    
 clf = svm.SVC()
 clf.fit(X_train, y_train)


 predictions = clf.predict(X_test)
 print predictions

 # Now let's check our predictions:
 correct = 0
 for i, prediction in enumerate(predictions):
    if prediction == y_test[i]:
        correct += 1

 print "Percentage correct:"
 print float(correct) / len(predictions) * 100
	#!/usr/bin/env python2
	# -- coding: utf-8 --
	"""
	Created on Wed Jul 5 00:19:18 2017

	@author: saliksyed
	"""
	import json
	from sklearn import svm

	routes = json.loads(open("routes.json").read())

	country_to_country_code = json.loads(open("country_name_to_country_code.json").read())

	populations = json.loads(open("population_by_country_code.json").read())

	gdps = json.loads(open("gdp_by_country_code.json").read())

	country_code_to_feature = {}
	for country in routes:
	if country in country_to_country_code:
	code = country_to_country_code[country]
	to_amount = sum(routes[country]["to"].values())
	from_amount = sum(routes[country]["from"].values())
	country_code_to_feature[code] = [to_amount, from_amount]


	X = []
	y = []

	# read the data in to the X, y vectors:
	for code in country_code_to_feature:
	if populations[code] != None: # skip examples without gdp data
	X.append(country_code_to_feature[code])
	if populations[code] > 100000000:
	y.append(1)
	else:
	y.append(0)


	# now we'll keep most of the data is training data, and keep the rest to "test"
	# if our algorithm actually works on new examples it hasn't seen before

	num_train = int(len(X)/2.0)

	X_train = X[:num_train]
	y_train = y[:num_train]

	X_test = X[num_train:]
	y_test = y[num_train:]

	clf = svm.SVC()
	clf.fit(X_train, y_train)


	predictions = clf.predict(X_test)
	print predictions

	# Now let's check our predictions:
	correct = 0
	for i, prediction in enumerate(predictions):
	if prediction == y_test[i]:
	correct += 1

	print "Percentage correct:"
	print float(correct) / len(predictions) * 100