Created
May 23, 2012 03:15
-
-
Save ryanswanstrom/2773067 to your computer and use it in GitHub Desktop.
A simple logistic regression solution to the Kaggle Biological Response Competition
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| from sklearn.linear_model import LogisticRegression | |
| import csv_io | |
| import math | |
| import scipy | |
| def main(): | |
| #read in the training file | |
| train = csv_io.read_data("train.csv") | |
| #set the training responses | |
| target = [x[0] for x in train] | |
| #set the training features | |
| train = [x[1:] for x in train] | |
| #read in the test file | |
| realtest = csv_io.read_data("test.csv") | |
| # code for logistic regression | |
| lr = LogisticRegression() | |
| lr.fit(train, target) | |
| predicted_probs = lr.predict_proba(realtest) | |
| # write solutions to file | |
| predicted_probs = ["%f" % x[1] for x in predicted_probs] | |
| csv_io.write_delimited_file("log_solution.csv", predicted_probs) | |
| print ('Logistic Regression Complete! Submit log_solution.csv to Kaggle') | |
| if __name__=="__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment