Skip to content

Instantly share code, notes, and snippets.

@matmoody
Created May 16, 2016 20:32
Show Gist options
  • Save matmoody/68fee9c8645f6b8c8fccb2c8b2d8e745 to your computer and use it in GitHub Desktop.
Save matmoody/68fee9c8645f6b8c8fccb2c8b2d8e745 to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
loan_data = pd.read_csv("loansData.csv")
loan_data.head()
# Clean Interest.Rate column
loan_data['Interest.Rate'] = loan_data['Interest.Rate'].map(lambda x: float(x.rstrip('%')))
# Clean Loan.Length column
loan_data['Loan.Length'] = loan_data['Loan.Length'].map(lambda x: x.strip(' months'))
# Clean and convert FICO.Range
loan_data['FICO.Score'] = loan_data['FICO.Range'].map(lambda x: int(x[:3]))
# Review data to make sure cleaning and conversions look correct
plt.figure()
p = loan_data['FICO.Score'].hist()
plt.show()
from sklearn.cross_validation import KFold
kf = KFold(len(loan_data), n_folds=10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment